patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*-
2  *
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "radeon.h"
31 #include "drmP.h"
32 #include "drm.h"
33 #include "drm_sarea.h"
34 #include "radeon_drm.h"
35 #include "radeon_drv.h"
36
37
38 /* ================================================================
39  * Helper functions for client state checking and fixup
40  */
41
42 static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_priv,
43                                                      drm_file_t *filp_priv,
44                                                      u32 *offset ) {
45         u32 off = *offset;
46
47         if ( off >= dev_priv->fb_location &&
48              off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
49                 return 0;
50
51         off += filp_priv->radeon_fb_delta;
52
53         DRM_DEBUG( "offset fixed up to 0x%x\n", off );
54
55         if ( off < dev_priv->fb_location ||
56              off >= ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
57                 return DRM_ERR( EINVAL );
58
59         *offset = off;
60
61         return 0;
62 }
63
64 static __inline__ int radeon_check_and_fixup_offset_user( drm_radeon_private_t *dev_priv,
65                                                           drm_file_t *filp_priv,
66                                                           u32 *offset ) {
67         u32 off;
68
69         DRM_GET_USER_UNCHECKED( off, offset );
70
71         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &off ) )
72                 return DRM_ERR( EINVAL );
73
74         DRM_PUT_USER_UNCHECKED( offset, off );
75
76         return 0;
77 }
78
79 static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv,
80                                                       drm_file_t *filp_priv,
81                                                       int id,
82                                                       u32 *data ) {
83         switch ( id ) {
84
85         case RADEON_EMIT_PP_MISC:
86                 if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
87                                                          &data[( RADEON_RB3D_DEPTHOFFSET
88                                                                  - RADEON_PP_MISC ) / 4] ) ) {
89                         DRM_ERROR( "Invalid depth buffer offset\n" );
90                         return DRM_ERR( EINVAL );
91                 }
92                 break;
93
94         case RADEON_EMIT_PP_CNTL:
95                 if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
96                                                          &data[( RADEON_RB3D_COLOROFFSET
97                                                                  - RADEON_PP_CNTL ) / 4] ) ) {
98                         DRM_ERROR( "Invalid colour buffer offset\n" );
99                         return DRM_ERR( EINVAL );
100                 }
101                 break;
102
103         case R200_EMIT_PP_TXOFFSET_0:
104         case R200_EMIT_PP_TXOFFSET_1:
105         case R200_EMIT_PP_TXOFFSET_2:
106         case R200_EMIT_PP_TXOFFSET_3:
107         case R200_EMIT_PP_TXOFFSET_4:
108         case R200_EMIT_PP_TXOFFSET_5:
109                 if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
110                                                          &data[0] ) ) {
111                         DRM_ERROR( "Invalid R200 texture offset\n" );
112                         return DRM_ERR( EINVAL );
113                 }
114                 break;
115
116         case RADEON_EMIT_PP_TXFILTER_0:
117         case RADEON_EMIT_PP_TXFILTER_1:
118         case RADEON_EMIT_PP_TXFILTER_2:
119                 if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
120                                                          &data[( RADEON_PP_TXOFFSET_0
121                                                                  - RADEON_PP_TXFILTER_0 ) / 4] ) ) {
122                         DRM_ERROR( "Invalid R100 texture offset\n" );
123                         return DRM_ERR( EINVAL );
124                 }
125                 break;
126
127         case R200_EMIT_PP_CUBIC_OFFSETS_0:
128         case R200_EMIT_PP_CUBIC_OFFSETS_1:
129         case R200_EMIT_PP_CUBIC_OFFSETS_2:
130         case R200_EMIT_PP_CUBIC_OFFSETS_3:
131         case R200_EMIT_PP_CUBIC_OFFSETS_4:
132         case R200_EMIT_PP_CUBIC_OFFSETS_5: {
133                 int i;
134                 for ( i = 0; i < 5; i++ ) {
135                         if ( radeon_check_and_fixup_offset_user( dev_priv,
136                                                                  filp_priv,
137                                                                  &data[i] ) ) {
138                                 DRM_ERROR( "Invalid R200 cubic texture offset\n" );
139                                 return DRM_ERR( EINVAL );
140                         }
141                 }
142                 break;
143         }
144
145         case RADEON_EMIT_RB3D_COLORPITCH:
146         case RADEON_EMIT_RE_LINE_PATTERN:
147         case RADEON_EMIT_SE_LINE_WIDTH:
148         case RADEON_EMIT_PP_LUM_MATRIX:
149         case RADEON_EMIT_PP_ROT_MATRIX_0:
150         case RADEON_EMIT_RB3D_STENCILREFMASK:
151         case RADEON_EMIT_SE_VPORT_XSCALE:
152         case RADEON_EMIT_SE_CNTL:
153         case RADEON_EMIT_SE_CNTL_STATUS:
154         case RADEON_EMIT_RE_MISC:
155         case RADEON_EMIT_PP_BORDER_COLOR_0:
156         case RADEON_EMIT_PP_BORDER_COLOR_1:
157         case RADEON_EMIT_PP_BORDER_COLOR_2:
158         case RADEON_EMIT_SE_ZBIAS_FACTOR:
159         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
160         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
161         case R200_EMIT_PP_TXCBLEND_0:
162         case R200_EMIT_PP_TXCBLEND_1:
163         case R200_EMIT_PP_TXCBLEND_2:
164         case R200_EMIT_PP_TXCBLEND_3:
165         case R200_EMIT_PP_TXCBLEND_4:
166         case R200_EMIT_PP_TXCBLEND_5:
167         case R200_EMIT_PP_TXCBLEND_6:
168         case R200_EMIT_PP_TXCBLEND_7:
169         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
170         case R200_EMIT_TFACTOR_0:
171         case R200_EMIT_VTX_FMT_0:
172         case R200_EMIT_VAP_CTL:
173         case R200_EMIT_MATRIX_SELECT_0:
174         case R200_EMIT_TEX_PROC_CTL_2:
175         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
176         case R200_EMIT_PP_TXFILTER_0:
177         case R200_EMIT_PP_TXFILTER_1:
178         case R200_EMIT_PP_TXFILTER_2:
179         case R200_EMIT_PP_TXFILTER_3:
180         case R200_EMIT_PP_TXFILTER_4:
181         case R200_EMIT_PP_TXFILTER_5:
182         case R200_EMIT_VTE_CNTL:
183         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
184         case R200_EMIT_PP_TAM_DEBUG3:
185         case R200_EMIT_PP_CNTL_X:
186         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
187         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
188         case R200_EMIT_RE_SCISSOR_TL_0:
189         case R200_EMIT_RE_SCISSOR_TL_1:
190         case R200_EMIT_RE_SCISSOR_TL_2:
191         case R200_EMIT_SE_VAP_CNTL_STATUS:
192         case R200_EMIT_SE_VTX_STATE_CNTL:
193         case R200_EMIT_RE_POINTSIZE:
194         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
195         case R200_EMIT_PP_CUBIC_FACES_0:
196         case R200_EMIT_PP_CUBIC_FACES_1:
197         case R200_EMIT_PP_CUBIC_FACES_2:
198         case R200_EMIT_PP_CUBIC_FACES_3:
199         case R200_EMIT_PP_CUBIC_FACES_4:
200         case R200_EMIT_PP_CUBIC_FACES_5:
201         case RADEON_EMIT_PP_TEX_SIZE_0:
202         case RADEON_EMIT_PP_TEX_SIZE_1:
203         case RADEON_EMIT_PP_TEX_SIZE_2:
204         case R200_EMIT_RB3D_BLENDCOLOR:
205                 /* These packets don't contain memory offsets */
206                 break;
207
208         default:
209                 DRM_ERROR( "Unknown state packet ID %d\n", id );
210                 return DRM_ERR( EINVAL );
211         }
212
213         return 0;
214 }
215
216 static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_priv,
217                                                       drm_file_t *filp_priv,
218                                                       drm_radeon_cmd_buffer_t *cmdbuf,
219                                                       unsigned int *cmdsz ) {
220         u32 tmp[4], *cmd = ( u32* )cmdbuf->buf;
221
222         if ( DRM_COPY_FROM_USER_UNCHECKED( tmp, cmd, sizeof( tmp ) ) ) {
223                 DRM_ERROR( "Failed to copy data from user space\n" );
224                 return DRM_ERR( EFAULT );
225         }
226
227         *cmdsz = 2 + ( ( tmp[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 );
228
229         if ( ( tmp[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) {
230                 DRM_ERROR( "Not a type 3 packet\n" );
231                 return DRM_ERR( EINVAL );
232         }
233
234         if ( 4 * *cmdsz > cmdbuf->bufsz ) {
235                 DRM_ERROR( "Packet size larger than size of data provided\n" );
236                 return DRM_ERR( EINVAL );
237         }
238
239         /* Check client state and fix it up if necessary */
240         if ( tmp[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */
241                 u32 offset;
242
243                 if ( tmp[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL
244                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
245                         offset = tmp[2] << 10;
246                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
247                                 DRM_ERROR( "Invalid first packet offset\n" );
248                                 return DRM_ERR( EINVAL );
249                         }
250                         tmp[2] = ( tmp[2] & 0xffc00000 ) | offset >> 10;
251                 }
252
253                 if ( ( tmp[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) &&
254                      ( tmp[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
255                         offset = tmp[3] << 10;
256                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
257                                 DRM_ERROR( "Invalid second packet offset\n" );
258                                 return DRM_ERR( EINVAL );
259                         }
260                         tmp[3] = ( tmp[3] & 0xffc00000 ) | offset >> 10;
261                 }
262
263                 if ( DRM_COPY_TO_USER_UNCHECKED( cmd, tmp, sizeof( tmp ) ) ) {
264                         DRM_ERROR( "Failed to copy data to user space\n" );
265                         return DRM_ERR( EFAULT );
266                 }
267         }
268
269         return 0;
270 }
271
272
273 /* ================================================================
274  * CP hardware state programming functions
275  */
276
277 static __inline__ void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv,
278                                           drm_clip_rect_t *box )
279 {
280         RING_LOCALS;
281
282         DRM_DEBUG( "   box:  x1=%d y1=%d  x2=%d y2=%d\n",
283                    box->x1, box->y1, box->x2, box->y2 );
284
285         BEGIN_RING( 4 );
286         OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) );
287         OUT_RING( (box->y1 << 16) | box->x1 );
288         OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) );
289         OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );
290         ADVANCE_RING();
291 }
292
293 /* Emit 1.1 state
294  */
295 static int radeon_emit_state( drm_radeon_private_t *dev_priv,
296                               drm_file_t *filp_priv,
297                               drm_radeon_context_regs_t *ctx,
298                               drm_radeon_texture_regs_t *tex,
299                               unsigned int dirty )
300 {
301         RING_LOCALS;
302         DRM_DEBUG( "dirty=0x%08x\n", dirty );
303
304         if ( dirty & RADEON_UPLOAD_CONTEXT ) {
305                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
306                                                     &ctx->rb3d_depthoffset ) ) {
307                         DRM_ERROR( "Invalid depth buffer offset\n" );
308                         return DRM_ERR( EINVAL );
309                 }
310
311                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
312                                                     &ctx->rb3d_coloroffset ) ) {
313                         DRM_ERROR( "Invalid depth buffer offset\n" );
314                         return DRM_ERR( EINVAL );
315                 }
316
317                 BEGIN_RING( 14 );
318                 OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
319                 OUT_RING( ctx->pp_misc );
320                 OUT_RING( ctx->pp_fog_color );
321                 OUT_RING( ctx->re_solid_color );
322                 OUT_RING( ctx->rb3d_blendcntl );
323                 OUT_RING( ctx->rb3d_depthoffset );
324                 OUT_RING( ctx->rb3d_depthpitch );
325                 OUT_RING( ctx->rb3d_zstencilcntl );
326                 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
327                 OUT_RING( ctx->pp_cntl );
328                 OUT_RING( ctx->rb3d_cntl );
329                 OUT_RING( ctx->rb3d_coloroffset );
330                 OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
331                 OUT_RING( ctx->rb3d_colorpitch );
332                 ADVANCE_RING();
333         }
334
335         if ( dirty & RADEON_UPLOAD_VERTFMT ) {
336                 BEGIN_RING( 2 );
337                 OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
338                 OUT_RING( ctx->se_coord_fmt );
339                 ADVANCE_RING();
340         }
341
342         if ( dirty & RADEON_UPLOAD_LINE ) {
343                 BEGIN_RING( 5 );
344                 OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
345                 OUT_RING( ctx->re_line_pattern );
346                 OUT_RING( ctx->re_line_state );
347                 OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
348                 OUT_RING( ctx->se_line_width );
349                 ADVANCE_RING();
350         }
351
352         if ( dirty & RADEON_UPLOAD_BUMPMAP ) {
353                 BEGIN_RING( 5 );
354                 OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
355                 OUT_RING( ctx->pp_lum_matrix );
356                 OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
357                 OUT_RING( ctx->pp_rot_matrix_0 );
358                 OUT_RING( ctx->pp_rot_matrix_1 );
359                 ADVANCE_RING();
360         }
361
362         if ( dirty & RADEON_UPLOAD_MASKS ) {
363                 BEGIN_RING( 4 );
364                 OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
365                 OUT_RING( ctx->rb3d_stencilrefmask );
366                 OUT_RING( ctx->rb3d_ropcntl );
367                 OUT_RING( ctx->rb3d_planemask );
368                 ADVANCE_RING();
369         }
370
371         if ( dirty & RADEON_UPLOAD_VIEWPORT ) {
372                 BEGIN_RING( 7 );
373                 OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
374                 OUT_RING( ctx->se_vport_xscale );
375                 OUT_RING( ctx->se_vport_xoffset );
376                 OUT_RING( ctx->se_vport_yscale );
377                 OUT_RING( ctx->se_vport_yoffset );
378                 OUT_RING( ctx->se_vport_zscale );
379                 OUT_RING( ctx->se_vport_zoffset );
380                 ADVANCE_RING();
381         }
382
383         if ( dirty & RADEON_UPLOAD_SETUP ) {
384                 BEGIN_RING( 4 );
385                 OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
386                 OUT_RING( ctx->se_cntl );
387                 OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
388                 OUT_RING( ctx->se_cntl_status );
389                 ADVANCE_RING();
390         }
391
392         if ( dirty & RADEON_UPLOAD_MISC ) {
393                 BEGIN_RING( 2 );
394                 OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
395                 OUT_RING( ctx->re_misc );
396                 ADVANCE_RING();
397         }
398
399         if ( dirty & RADEON_UPLOAD_TEX0 ) {
400                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
401                                                     &tex[0].pp_txoffset ) ) {
402                         DRM_ERROR( "Invalid texture offset for unit 0\n" );
403                         return DRM_ERR( EINVAL );
404                 }
405
406                 BEGIN_RING( 9 );
407                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
408                 OUT_RING( tex[0].pp_txfilter );
409                 OUT_RING( tex[0].pp_txformat );
410                 OUT_RING( tex[0].pp_txoffset );
411                 OUT_RING( tex[0].pp_txcblend );
412                 OUT_RING( tex[0].pp_txablend );
413                 OUT_RING( tex[0].pp_tfactor );
414                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
415                 OUT_RING( tex[0].pp_border_color );
416                 ADVANCE_RING();
417         }
418
419         if ( dirty & RADEON_UPLOAD_TEX1 ) {
420                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
421                                                     &tex[1].pp_txoffset ) ) {
422                         DRM_ERROR( "Invalid texture offset for unit 1\n" );
423                         return DRM_ERR( EINVAL );
424                 }
425
426                 BEGIN_RING( 9 );
427                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
428                 OUT_RING( tex[1].pp_txfilter );
429                 OUT_RING( tex[1].pp_txformat );
430                 OUT_RING( tex[1].pp_txoffset );
431                 OUT_RING( tex[1].pp_txcblend );
432                 OUT_RING( tex[1].pp_txablend );
433                 OUT_RING( tex[1].pp_tfactor );
434                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
435                 OUT_RING( tex[1].pp_border_color );
436                 ADVANCE_RING();
437         }
438
439         if ( dirty & RADEON_UPLOAD_TEX2 ) {
440                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
441                                                     &tex[2].pp_txoffset ) ) {
442                         DRM_ERROR( "Invalid texture offset for unit 2\n" );
443                         return DRM_ERR( EINVAL );
444                 }
445
446                 BEGIN_RING( 9 );
447                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
448                 OUT_RING( tex[2].pp_txfilter );
449                 OUT_RING( tex[2].pp_txformat );
450                 OUT_RING( tex[2].pp_txoffset );
451                 OUT_RING( tex[2].pp_txcblend );
452                 OUT_RING( tex[2].pp_txablend );
453                 OUT_RING( tex[2].pp_tfactor );
454                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
455                 OUT_RING( tex[2].pp_border_color );
456                 ADVANCE_RING();
457         }
458
459         return 0;
460 }
461
462 /* Emit 1.2 state
463  */
464 static int radeon_emit_state2( drm_radeon_private_t *dev_priv,
465                                drm_file_t *filp_priv,
466                                drm_radeon_state_t *state )
467 {
468         RING_LOCALS;
469
470         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
471                 BEGIN_RING( 3 );
472                 OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
473                 OUT_RING( state->context2.se_zbias_factor ); 
474                 OUT_RING( state->context2.se_zbias_constant ); 
475                 ADVANCE_RING();
476         }
477
478         return radeon_emit_state( dev_priv, filp_priv, &state->context,
479                            state->tex, state->dirty );
480 }
481
482 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
483  * 1.3 cmdbuffers allow all previous state to be updated as well as
484  * the tcl scalar and vector areas.  
485  */
486 static struct { 
487         int start; 
488         int len; 
489         const char *name;
490 } packet[RADEON_MAX_STATE_PACKETS] = {
491         { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
492         { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
493         { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
494         { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
495         { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
496         { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
497         { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
498         { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
499         { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
500         { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
501         { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
502         { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
503         { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
504         { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
505         { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
506         { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
507         { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
508         { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
509         { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
510         { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
511         { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
512         { R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" },
513         { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
514         { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
515         { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
516         { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
517         { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
518         { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
519         { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
520         { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
521         { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
522         { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
523         { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
524         { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
525         { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
526         { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
527         { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
528         { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
529         { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
530         { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
531         { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
532         { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
533         { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
534         { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
535         { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
536         { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
537         { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
538         { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
539         { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
540         { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
541         { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
542         { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, 
543         { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, 
544         { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, 
545         { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, 
546         { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, 
547         { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, 
548         { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, 
549         { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, 
550         { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, 
551         { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
552         { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
553         { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
554         { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
555         { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
556         { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
557         { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
558         { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
559         { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
560         { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
561         { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
562         { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
563         { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
564         { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
565         { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
566         { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_1" },
567         { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
568 };
569
570
571
572 /* ================================================================
573  * Performance monitoring functions
574  */
575
576 static void radeon_clear_box( drm_radeon_private_t *dev_priv,
577                               int x, int y, int w, int h,
578                               int r, int g, int b )
579 {
580         u32 color;
581         RING_LOCALS;
582
583         x += dev_priv->sarea_priv->boxes[0].x1;
584         y += dev_priv->sarea_priv->boxes[0].y1;
585
586         switch ( dev_priv->color_fmt ) {
587         case RADEON_COLOR_FORMAT_RGB565:
588                 color = (((r & 0xf8) << 8) |
589                          ((g & 0xfc) << 3) |
590                          ((b & 0xf8) >> 3));
591                 break;
592         case RADEON_COLOR_FORMAT_ARGB8888:
593         default:
594                 color = (((0xff) << 24) | (r << 16) | (g <<  8) | b);
595                 break;
596         }
597
598         BEGIN_RING( 4 );
599         RADEON_WAIT_UNTIL_3D_IDLE();            
600         OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
601         OUT_RING( 0xffffffff );
602         ADVANCE_RING();
603
604         BEGIN_RING( 6 );
605
606         OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
607         OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
608                   RADEON_GMC_BRUSH_SOLID_COLOR |
609                   (dev_priv->color_fmt << 8) |
610                   RADEON_GMC_SRC_DATATYPE_COLOR |
611                   RADEON_ROP3_P |
612                   RADEON_GMC_CLR_CMP_CNTL_DIS );
613
614         if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) { 
615                 OUT_RING( dev_priv->front_pitch_offset );
616         } else {         
617                 OUT_RING( dev_priv->back_pitch_offset );
618         } 
619
620         OUT_RING( color );
621
622         OUT_RING( (x << 16) | y );
623         OUT_RING( (w << 16) | h );
624
625         ADVANCE_RING();
626 }
627
628 static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
629 {
630         /* Collapse various things into a wait flag -- trying to
631          * guess if userspase slept -- better just to have them tell us.
632          */
633         if (dev_priv->stats.last_frame_reads > 1 ||
634             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
635                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
636         }
637
638         if (dev_priv->stats.freelist_loops) {
639                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
640         }
641
642         /* Purple box for page flipping
643          */
644         if ( dev_priv->stats.boxes & RADEON_BOX_FLIP ) 
645                 radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 );
646
647         /* Red box if we have to wait for idle at any point
648          */
649         if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE ) 
650                 radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 );
651
652         /* Blue box: lost context?
653          */
654
655         /* Yellow box for texture swaps
656          */
657         if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD ) 
658                 radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 );
659
660         /* Green box if hardware never idles (as far as we can tell)
661          */
662         if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) ) 
663                 radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
664
665
666         /* Draw bars indicating number of buffers allocated 
667          * (not a great measure, easily confused)
668          */
669         if (dev_priv->stats.requested_bufs) {
670                 if (dev_priv->stats.requested_bufs > 100)
671                         dev_priv->stats.requested_bufs = 100;
672
673                 radeon_clear_box( dev_priv, 4, 16,  
674                                   dev_priv->stats.requested_bufs, 4,
675                                   196, 128, 128 );
676         }
677
678         memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
679
680 }
681 /* ================================================================
682  * CP command dispatch functions
683  */
684
685 static void radeon_cp_dispatch_clear( drm_device_t *dev,
686                                       drm_radeon_clear_t *clear,
687                                       drm_radeon_clear_rect_t *depth_boxes )
688 {
689         drm_radeon_private_t *dev_priv = dev->dev_private;
690         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
691         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
692         int nbox = sarea_priv->nbox;
693         drm_clip_rect_t *pbox = sarea_priv->boxes;
694         unsigned int flags = clear->flags;
695         u32 rb3d_cntl = 0, rb3d_stencilrefmask= 0;
696         int i;
697         RING_LOCALS;
698         DRM_DEBUG( "flags = 0x%x\n", flags );
699
700         dev_priv->stats.clears++;
701
702         if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
703                 unsigned int tmp = flags;
704
705                 flags &= ~(RADEON_FRONT | RADEON_BACK);
706                 if ( tmp & RADEON_FRONT ) flags |= RADEON_BACK;
707                 if ( tmp & RADEON_BACK )  flags |= RADEON_FRONT;
708         }
709
710         if ( flags & (RADEON_FRONT | RADEON_BACK) ) {
711
712                 BEGIN_RING( 4 );
713
714                 /* Ensure the 3D stream is idle before doing a
715                  * 2D fill to clear the front or back buffer.
716                  */
717                 RADEON_WAIT_UNTIL_3D_IDLE();
718                 
719                 OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
720                 OUT_RING( clear->color_mask );
721
722                 ADVANCE_RING();
723
724                 /* Make sure we restore the 3D state next time.
725                  */
726                 dev_priv->sarea_priv->ctx_owner = 0;
727
728                 for ( i = 0 ; i < nbox ; i++ ) {
729                         int x = pbox[i].x1;
730                         int y = pbox[i].y1;
731                         int w = pbox[i].x2 - x;
732                         int h = pbox[i].y2 - y;
733
734                         DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
735                                    x, y, w, h, flags );
736
737                         if ( flags & RADEON_FRONT ) {
738                                 BEGIN_RING( 6 );
739                                 
740                                 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
741                                 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
742                                           RADEON_GMC_BRUSH_SOLID_COLOR |
743                                           (dev_priv->color_fmt << 8) |
744                                           RADEON_GMC_SRC_DATATYPE_COLOR |
745                                           RADEON_ROP3_P |
746                                           RADEON_GMC_CLR_CMP_CNTL_DIS );
747
748                                 OUT_RING( dev_priv->front_pitch_offset );
749                                 OUT_RING( clear->clear_color );
750                                 
751                                 OUT_RING( (x << 16) | y );
752                                 OUT_RING( (w << 16) | h );
753                                 
754                                 ADVANCE_RING();
755                         }
756                         
757                         if ( flags & RADEON_BACK ) {
758                                 BEGIN_RING( 6 );
759                                 
760                                 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
761                                 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
762                                           RADEON_GMC_BRUSH_SOLID_COLOR |
763                                           (dev_priv->color_fmt << 8) |
764                                           RADEON_GMC_SRC_DATATYPE_COLOR |
765                                           RADEON_ROP3_P |
766                                           RADEON_GMC_CLR_CMP_CNTL_DIS );
767                                 
768                                 OUT_RING( dev_priv->back_pitch_offset );
769                                 OUT_RING( clear->clear_color );
770
771                                 OUT_RING( (x << 16) | y );
772                                 OUT_RING( (w << 16) | h );
773
774                                 ADVANCE_RING();
775                         }
776                 }
777         }
778
779         /* We have to clear the depth and/or stencil buffers by
780          * rendering a quad into just those buffers.  Thus, we have to
781          * make sure the 3D engine is configured correctly.
782          */
783         if ( dev_priv->is_r200 &&
784              (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
785
786                 int tempPP_CNTL;
787                 int tempRE_CNTL;
788                 int tempRB3D_CNTL;
789                 int tempRB3D_ZSTENCILCNTL;
790                 int tempRB3D_STENCILREFMASK;
791                 int tempRB3D_PLANEMASK;
792                 int tempSE_CNTL;
793                 int tempSE_VTE_CNTL;
794                 int tempSE_VTX_FMT_0;
795                 int tempSE_VTX_FMT_1;
796                 int tempSE_VAP_CNTL;
797                 int tempRE_AUX_SCISSOR_CNTL;
798
799                 tempPP_CNTL = 0;
800                 tempRE_CNTL = 0;
801
802                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
803                 tempRB3D_CNTL &= ~(1<<15); /* unset radeon magic flag */
804
805                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
806                 tempRB3D_STENCILREFMASK = 0x0;
807
808                 tempSE_CNTL = depth_clear->se_cntl;
809
810
811
812                 /* Disable TCL */
813
814                 tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
815                                    (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
816
817                 tempRB3D_PLANEMASK = 0x0;
818
819                 tempRE_AUX_SCISSOR_CNTL = 0x0;
820
821                 tempSE_VTE_CNTL =
822                         SE_VTE_CNTL__VTX_XY_FMT_MASK |
823                         SE_VTE_CNTL__VTX_Z_FMT_MASK;
824
825                 /* Vertex format (X, Y, Z, W)*/
826                 tempSE_VTX_FMT_0 =
827                         SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
828                         SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
829                 tempSE_VTX_FMT_1 = 0x0;
830
831
832                 /* 
833                  * Depth buffer specific enables 
834                  */
835                 if (flags & RADEON_DEPTH) {
836                         /* Enable depth buffer */
837                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
838                 } else {
839                         /* Disable depth buffer */
840                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
841                 }
842
843                 /* 
844                  * Stencil buffer specific enables
845                  */
846                 if ( flags & RADEON_STENCIL ) {
847                         tempRB3D_CNTL |=  RADEON_STENCIL_ENABLE;
848                         tempRB3D_STENCILREFMASK = clear->depth_mask; 
849                 } else {
850                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
851                         tempRB3D_STENCILREFMASK = 0x00000000;
852                 }
853
854                 BEGIN_RING( 26 );
855                 RADEON_WAIT_UNTIL_2D_IDLE();
856
857                 OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL );
858                 OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL );
859                 OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL );
860                 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
861                               tempRB3D_ZSTENCILCNTL );
862                 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, 
863                               tempRB3D_STENCILREFMASK );
864                 OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK );
865                 OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL );
866                 OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL );
867                 OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 );
868                 OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 );
869                 OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL );
870                 OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL, 
871                               tempRE_AUX_SCISSOR_CNTL );
872                 ADVANCE_RING();
873
874                 /* Make sure we restore the 3D state next time.
875                  */
876                 dev_priv->sarea_priv->ctx_owner = 0;
877
878                 for ( i = 0 ; i < nbox ; i++ ) {
879                         
880                         /* Funny that this should be required -- 
881                          *  sets top-left?
882                          */
883                         radeon_emit_clip_rect( dev_priv,
884                                                &sarea_priv->boxes[i] );
885
886                         BEGIN_RING( 14 );
887                         OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) );
888                         OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
889                                    RADEON_PRIM_WALK_RING |
890                                    (3 << RADEON_NUM_VERTICES_SHIFT)) );
891                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
892                         OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
893                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
894                         OUT_RING( 0x3f800000 );
895                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
896                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
897                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
898                         OUT_RING( 0x3f800000 );
899                         OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
900                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
901                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
902                         OUT_RING( 0x3f800000 );
903                         ADVANCE_RING();
904                 }
905         } 
906         else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
907
908                 rb3d_cntl = depth_clear->rb3d_cntl;
909
910                 if ( flags & RADEON_DEPTH ) {
911                         rb3d_cntl |=  RADEON_Z_ENABLE;
912                 } else {
913                         rb3d_cntl &= ~RADEON_Z_ENABLE;
914                 }
915
916                 if ( flags & RADEON_STENCIL ) {
917                         rb3d_cntl |=  RADEON_STENCIL_ENABLE;
918                         rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
919                 } else {
920                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
921                         rb3d_stencilrefmask = 0x00000000;
922                 }
923
924                 BEGIN_RING( 13 );
925                 RADEON_WAIT_UNTIL_2D_IDLE();
926
927                 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
928                 OUT_RING( 0x00000000 );
929                 OUT_RING( rb3d_cntl );
930                 
931                 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
932                               depth_clear->rb3d_zstencilcntl );
933                 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
934                               rb3d_stencilrefmask );
935                 OUT_RING_REG( RADEON_RB3D_PLANEMASK,
936                               0x00000000 );
937                 OUT_RING_REG( RADEON_SE_CNTL,
938                               depth_clear->se_cntl );
939                 ADVANCE_RING();
940
941                 /* Make sure we restore the 3D state next time.
942                  */
943                 dev_priv->sarea_priv->ctx_owner = 0;
944
945                 for ( i = 0 ; i < nbox ; i++ ) {
946                         
947                         /* Funny that this should be required -- 
948                          *  sets top-left?
949                          */
950                         radeon_emit_clip_rect( dev_priv,
951                                                &sarea_priv->boxes[i] );
952
953                         BEGIN_RING( 15 );
954
955                         OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
956                         OUT_RING( RADEON_VTX_Z_PRESENT |
957                                   RADEON_VTX_PKCOLOR_PRESENT);
958                         OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
959                                    RADEON_PRIM_WALK_RING |
960                                    RADEON_MAOS_ENABLE |
961                                    RADEON_VTX_FMT_RADEON_MODE |
962                                    (3 << RADEON_NUM_VERTICES_SHIFT)) );
963
964
965                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
966                         OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
967                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
968                         OUT_RING( 0x0 );
969
970                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
971                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
972                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
973                         OUT_RING( 0x0 );
974
975                         OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
976                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
977                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
978                         OUT_RING( 0x0 );
979
980                         ADVANCE_RING();
981                 }
982         }
983
984         /* Increment the clear counter.  The client-side 3D driver must
985          * wait on this value before performing the clear ioctl.  We
986          * need this because the card's so damned fast...
987          */
988         dev_priv->sarea_priv->last_clear++;
989
990         BEGIN_RING( 4 );
991
992         RADEON_CLEAR_AGE( dev_priv->sarea_priv->last_clear );
993         RADEON_WAIT_UNTIL_IDLE();
994
995         ADVANCE_RING();
996 }
997
998 static void radeon_cp_dispatch_swap( drm_device_t *dev )
999 {
1000         drm_radeon_private_t *dev_priv = dev->dev_private;
1001         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1002         int nbox = sarea_priv->nbox;
1003         drm_clip_rect_t *pbox = sarea_priv->boxes;
1004         int i;
1005         RING_LOCALS;
1006         DRM_DEBUG( "\n" );
1007
1008         /* Do some trivial performance monitoring...
1009          */
1010         if (dev_priv->do_boxes)
1011                 radeon_cp_performance_boxes( dev_priv );
1012
1013
1014         /* Wait for the 3D stream to idle before dispatching the bitblt.
1015          * This will prevent data corruption between the two streams.
1016          */
1017         BEGIN_RING( 2 );
1018
1019         RADEON_WAIT_UNTIL_3D_IDLE();
1020
1021         ADVANCE_RING();
1022
1023         for ( i = 0 ; i < nbox ; i++ ) {
1024                 int x = pbox[i].x1;
1025                 int y = pbox[i].y1;
1026                 int w = pbox[i].x2 - x;
1027                 int h = pbox[i].y2 - y;
1028
1029                 DRM_DEBUG( "dispatch swap %d,%d-%d,%d\n",
1030                            x, y, w, h );
1031
1032                 BEGIN_RING( 7 );
1033
1034                 OUT_RING( CP_PACKET3( RADEON_CNTL_BITBLT_MULTI, 5 ) );
1035                 OUT_RING( RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1036                           RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1037                           RADEON_GMC_BRUSH_NONE |
1038                           (dev_priv->color_fmt << 8) |
1039                           RADEON_GMC_SRC_DATATYPE_COLOR |
1040                           RADEON_ROP3_S |
1041                           RADEON_DP_SRC_SOURCE_MEMORY |
1042                           RADEON_GMC_CLR_CMP_CNTL_DIS |
1043                           RADEON_GMC_WR_MSK_DIS );
1044                 
1045                 /* Make this work even if front & back are flipped:
1046                  */
1047                 if (dev_priv->current_page == 0) {
1048                         OUT_RING( dev_priv->back_pitch_offset );
1049                         OUT_RING( dev_priv->front_pitch_offset );
1050                 } 
1051                 else {
1052                         OUT_RING( dev_priv->front_pitch_offset );
1053                         OUT_RING( dev_priv->back_pitch_offset );
1054                 }
1055
1056                 OUT_RING( (x << 16) | y );
1057                 OUT_RING( (x << 16) | y );
1058                 OUT_RING( (w << 16) | h );
1059
1060                 ADVANCE_RING();
1061         }
1062
1063         /* Increment the frame counter.  The client-side 3D driver must
1064          * throttle the framerate by waiting for this value before
1065          * performing the swapbuffer ioctl.
1066          */
1067         dev_priv->sarea_priv->last_frame++;
1068
1069         BEGIN_RING( 4 );
1070
1071         RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1072         RADEON_WAIT_UNTIL_2D_IDLE();
1073
1074         ADVANCE_RING();
1075 }
1076
1077 static void radeon_cp_dispatch_flip( drm_device_t *dev )
1078 {
1079         drm_radeon_private_t *dev_priv = dev->dev_private;
1080         drm_sarea_t *sarea = (drm_sarea_t *)dev_priv->sarea->handle;
1081         int offset = (dev_priv->current_page == 1)
1082                    ? dev_priv->front_offset : dev_priv->back_offset;
1083         RING_LOCALS;
1084         DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n", 
1085                 __FUNCTION__, 
1086                 dev_priv->current_page,
1087                 dev_priv->sarea_priv->pfCurrentPage);
1088
1089         /* Do some trivial performance monitoring...
1090          */
1091         if (dev_priv->do_boxes) {
1092                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1093                 radeon_cp_performance_boxes( dev_priv );
1094         }
1095
1096         /* Update the frame offsets for both CRTCs
1097          */
1098         BEGIN_RING( 6 );
1099
1100         RADEON_WAIT_UNTIL_3D_IDLE();
1101         OUT_RING_REG( RADEON_CRTC_OFFSET, ( ( sarea->frame.y * dev_priv->front_pitch
1102                                               + sarea->frame.x 
1103                                               * ( dev_priv->color_fmt - 2 ) ) & ~7 )
1104                                           + offset );
1105         OUT_RING_REG( RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1106                                            + offset );
1107
1108         ADVANCE_RING();
1109
1110         /* Increment the frame counter.  The client-side 3D driver must
1111          * throttle the framerate by waiting for this value before
1112          * performing the swapbuffer ioctl.
1113          */
1114         dev_priv->sarea_priv->last_frame++;
1115         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1116                                               1 - dev_priv->current_page;
1117
1118         BEGIN_RING( 2 );
1119
1120         RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1121
1122         ADVANCE_RING();
1123 }
1124
1125 static int bad_prim_vertex_nr( int primitive, int nr )
1126 {
1127         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1128         case RADEON_PRIM_TYPE_NONE:
1129         case RADEON_PRIM_TYPE_POINT:
1130                 return nr < 1;
1131         case RADEON_PRIM_TYPE_LINE:
1132                 return (nr & 1) || nr == 0;
1133         case RADEON_PRIM_TYPE_LINE_STRIP:
1134                 return nr < 2;
1135         case RADEON_PRIM_TYPE_TRI_LIST:
1136         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1137         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1138         case RADEON_PRIM_TYPE_RECT_LIST:
1139                 return nr % 3 || nr == 0;
1140         case RADEON_PRIM_TYPE_TRI_FAN:
1141         case RADEON_PRIM_TYPE_TRI_STRIP:
1142                 return nr < 3;
1143         default:
1144                 return 1;
1145         }       
1146 }
1147
1148
1149
1150 typedef struct {
1151         unsigned int start;
1152         unsigned int finish;
1153         unsigned int prim;
1154         unsigned int numverts;
1155         unsigned int offset;   
1156         unsigned int vc_format;
1157 } drm_radeon_tcl_prim_t;
1158
1159 static void radeon_cp_dispatch_vertex( drm_device_t *dev,
1160                                        drm_buf_t *buf,
1161                                        drm_radeon_tcl_prim_t *prim )
1162
1163 {
1164         drm_radeon_private_t *dev_priv = dev->dev_private;
1165         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1166         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1167         int numverts = (int)prim->numverts;
1168         int nbox = sarea_priv->nbox;
1169         int i = 0;
1170         RING_LOCALS;
1171
1172         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1173                   prim->prim,
1174                   prim->vc_format,
1175                   prim->start,
1176                   prim->finish,
1177                   prim->numverts);
1178
1179         if (bad_prim_vertex_nr( prim->prim, prim->numverts )) {
1180                 DRM_ERROR( "bad prim %x numverts %d\n", 
1181                            prim->prim, prim->numverts );
1182                 return;
1183         }
1184
1185         do {
1186                 /* Emit the next cliprect */
1187                 if ( i < nbox ) {
1188                         radeon_emit_clip_rect( dev_priv, 
1189                                                &sarea_priv->boxes[i] );
1190                 }
1191
1192                 /* Emit the vertex buffer rendering commands */
1193                 BEGIN_RING( 5 );
1194
1195                 OUT_RING( CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, 3 ) );
1196                 OUT_RING( offset );
1197                 OUT_RING( numverts );
1198                 OUT_RING( prim->vc_format );
1199                 OUT_RING( prim->prim | RADEON_PRIM_WALK_LIST |
1200                           RADEON_COLOR_ORDER_RGBA |
1201                           RADEON_VTX_FMT_RADEON_MODE |
1202                           (numverts << RADEON_NUM_VERTICES_SHIFT) );
1203
1204                 ADVANCE_RING();
1205
1206                 i++;
1207         } while ( i < nbox );
1208 }
1209
1210
1211
1212 static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
1213 {
1214         drm_radeon_private_t *dev_priv = dev->dev_private;
1215         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1216         RING_LOCALS;
1217
1218         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1219
1220         /* Emit the vertex buffer age */
1221         BEGIN_RING( 2 );
1222         RADEON_DISPATCH_AGE( buf_priv->age );
1223         ADVANCE_RING();
1224
1225         buf->pending = 1;
1226         buf->used = 0;
1227 }
1228
1229 static void radeon_cp_dispatch_indirect( drm_device_t *dev,
1230                                          drm_buf_t *buf,
1231                                          int start, int end )
1232 {
1233         drm_radeon_private_t *dev_priv = dev->dev_private;
1234         RING_LOCALS;
1235         DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n",
1236                    buf->idx, start, end );
1237
1238         if ( start != end ) {
1239                 int offset = (dev_priv->gart_buffers_offset
1240                               + buf->offset + start);
1241                 int dwords = (end - start + 3) / sizeof(u32);
1242
1243                 /* Indirect buffer data must be an even number of
1244                  * dwords, so if we've been given an odd number we must
1245                  * pad the data with a Type-2 CP packet.
1246                  */
1247                 if ( dwords & 1 ) {
1248                         u32 *data = (u32 *)
1249                                 ((char *)dev_priv->buffers->handle
1250                                  + buf->offset + start);
1251                         data[dwords++] = RADEON_CP_PACKET2;
1252                 }
1253
1254                 /* Fire off the indirect buffer */
1255                 BEGIN_RING( 3 );
1256
1257                 OUT_RING( CP_PACKET0( RADEON_CP_IB_BASE, 1 ) );
1258                 OUT_RING( offset );
1259                 OUT_RING( dwords );
1260
1261                 ADVANCE_RING();
1262         }
1263 }
1264
1265
1266 static void radeon_cp_dispatch_indices( drm_device_t *dev,
1267                                         drm_buf_t *elt_buf,
1268                                         drm_radeon_tcl_prim_t *prim )
1269 {
1270         drm_radeon_private_t *dev_priv = dev->dev_private;
1271         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1272         int offset = dev_priv->gart_buffers_offset + prim->offset;
1273         u32 *data;
1274         int dwords;
1275         int i = 0;
1276         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1277         int count = (prim->finish - start) / sizeof(u16);
1278         int nbox = sarea_priv->nbox;
1279
1280         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1281                   prim->prim,
1282                   prim->vc_format,
1283                   prim->start,
1284                   prim->finish,
1285                   prim->offset,
1286                   prim->numverts);
1287
1288         if (bad_prim_vertex_nr( prim->prim, count )) {
1289                 DRM_ERROR( "bad prim %x count %d\n", 
1290                            prim->prim, count );
1291                 return;
1292         }
1293
1294
1295         if ( start >= prim->finish ||
1296              (prim->start & 0x7) ) {
1297                 DRM_ERROR( "buffer prim %d\n", prim->prim );
1298                 return;
1299         }
1300
1301         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1302
1303         data = (u32 *)((char *)dev_priv->buffers->handle +
1304                        elt_buf->offset + prim->start);
1305
1306         data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
1307         data[1] = offset;
1308         data[2] = prim->numverts;
1309         data[3] = prim->vc_format;
1310         data[4] = (prim->prim |
1311                    RADEON_PRIM_WALK_IND |
1312                    RADEON_COLOR_ORDER_RGBA |
1313                    RADEON_VTX_FMT_RADEON_MODE |
1314                    (count << RADEON_NUM_VERTICES_SHIFT) );
1315
1316         do {
1317                 if ( i < nbox ) 
1318                         radeon_emit_clip_rect( dev_priv, 
1319                                                &sarea_priv->boxes[i] );
1320
1321                 radeon_cp_dispatch_indirect( dev, elt_buf,
1322                                              prim->start,
1323                                              prim->finish );
1324
1325                 i++;
1326         } while ( i < nbox );
1327
1328 }
1329
1330 #define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32))
1331
1332 static int radeon_cp_dispatch_texture( DRMFILE filp,
1333                                        drm_device_t *dev,
1334                                        drm_radeon_texture_t *tex,
1335                                        drm_radeon_tex_image_t *image )
1336 {
1337         drm_radeon_private_t *dev_priv = dev->dev_private;
1338         drm_file_t *filp_priv;
1339         drm_buf_t *buf;
1340         u32 format;
1341         u32 *buffer;
1342         const u8 *data;
1343         int size, dwords, tex_width, blit_width;
1344         u32 height;
1345         int i;
1346         RING_LOCALS;
1347
1348         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1349
1350         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &tex->offset ) ) {
1351                 DRM_ERROR( "Invalid destination offset\n" );
1352                 return DRM_ERR( EINVAL );
1353         }
1354
1355         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1356
1357         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1358          * up with the texture data from the host data blit, otherwise
1359          * part of the texture image may be corrupted.
1360          */
1361         BEGIN_RING( 4 );
1362         RADEON_FLUSH_CACHE();
1363         RADEON_WAIT_UNTIL_IDLE();
1364         ADVANCE_RING();
1365
1366 #ifdef __BIG_ENDIAN
1367         /* The Mesa texture functions provide the data in little endian as the
1368          * chip wants it, but we need to compensate for the fact that the CP
1369          * ring gets byte-swapped
1370          */
1371         BEGIN_RING( 2 );
1372         OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
1373         ADVANCE_RING();
1374 #endif
1375
1376
1377         /* The compiler won't optimize away a division by a variable,
1378          * even if the only legal values are powers of two.  Thus, we'll
1379          * use a shift instead.
1380          */
1381         switch ( tex->format ) {
1382         case RADEON_TXFORMAT_ARGB8888:
1383         case RADEON_TXFORMAT_RGBA8888:
1384                 format = RADEON_COLOR_FORMAT_ARGB8888;
1385                 tex_width = tex->width * 4;
1386                 blit_width = image->width * 4;
1387                 break;
1388         case RADEON_TXFORMAT_AI88:
1389         case RADEON_TXFORMAT_ARGB1555:
1390         case RADEON_TXFORMAT_RGB565:
1391         case RADEON_TXFORMAT_ARGB4444:
1392         case RADEON_TXFORMAT_VYUY422:
1393         case RADEON_TXFORMAT_YVYU422:
1394                 format = RADEON_COLOR_FORMAT_RGB565;
1395                 tex_width = tex->width * 2;
1396                 blit_width = image->width * 2;
1397                 break;
1398         case RADEON_TXFORMAT_I8:
1399         case RADEON_TXFORMAT_RGB332:
1400                 format = RADEON_COLOR_FORMAT_CI8;
1401                 tex_width = tex->width * 1;
1402                 blit_width = image->width * 1;
1403                 break;
1404         default:
1405                 DRM_ERROR( "invalid texture format %d\n", tex->format );
1406                 return DRM_ERR(EINVAL);
1407         }
1408
1409         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
1410
1411         do {
1412                 DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1413                            tex->offset >> 10, tex->pitch, tex->format,
1414                            image->x, image->y, image->width, image->height );
1415
1416                 /* Make a copy of some parameters in case we have to
1417                  * update them for a multi-pass texture blit.
1418                  */
1419                 height = image->height;
1420                 data = (const u8 *)image->data;
1421                 
1422                 size = height * blit_width;
1423
1424                 if ( size > RADEON_MAX_TEXTURE_SIZE ) {
1425                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1426                         size = height * blit_width;
1427                 } else if ( size < 4 && size > 0 ) {
1428                         size = 4;
1429                 } else if ( size == 0 ) {
1430                         return 0;
1431                 }
1432
1433                 buf = radeon_freelist_get( dev );
1434                 if ( 0 && !buf ) {
1435                         radeon_do_cp_idle( dev_priv );
1436                         buf = radeon_freelist_get( dev );
1437                 }
1438                 if ( !buf ) {
1439                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1440                         DRM_COPY_TO_USER( tex->image, image, sizeof(*image) );
1441                         return DRM_ERR(EAGAIN);
1442                 }
1443
1444
1445                 /* Dispatch the indirect buffer.
1446                  */
1447                 buffer = (u32*)((char*)dev_priv->buffers->handle + buf->offset);
1448                 dwords = size / 4;
1449                 buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
1450                 buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1451                              RADEON_GMC_BRUSH_NONE |
1452                              (format << 8) |
1453                              RADEON_GMC_SRC_DATATYPE_COLOR |
1454                              RADEON_ROP3_S |
1455                              RADEON_DP_SRC_SOURCE_HOST_DATA |
1456                              RADEON_GMC_CLR_CMP_CNTL_DIS |
1457                              RADEON_GMC_WR_MSK_DIS);
1458                 
1459                 buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
1460                 buffer[3] = 0xffffffff;
1461                 buffer[4] = 0xffffffff;
1462                 buffer[5] = (image->y << 16) | image->x;
1463                 buffer[6] = (height << 16) | image->width;
1464                 buffer[7] = dwords;
1465                 buffer += 8;
1466
1467                 if ( tex_width >= 32 ) {
1468                         /* Texture image width is larger than the minimum, so we
1469                          * can upload it directly.
1470                          */
1471                         if ( DRM_COPY_FROM_USER( buffer, data, 
1472                                                  dwords * sizeof(u32) ) ) {
1473                                 DRM_ERROR( "EFAULT on data, %d dwords\n", 
1474                                            dwords );
1475                                 return DRM_ERR(EFAULT);
1476                         }
1477                 } else {
1478                         /* Texture image width is less than the minimum, so we
1479                          * need to pad out each image scanline to the minimum
1480                          * width.
1481                          */
1482                         for ( i = 0 ; i < tex->height ; i++ ) {
1483                                 if ( DRM_COPY_FROM_USER( buffer, data, 
1484                                                          tex_width ) ) {
1485                                         DRM_ERROR( "EFAULT on pad, %d bytes\n",
1486                                                    tex_width );
1487                                         return DRM_ERR(EFAULT);
1488                                 }
1489                                 buffer += 8;
1490                                 data += tex_width;
1491                         }
1492                 }
1493
1494                 buf->filp = filp;
1495                 buf->used = (dwords + 8) * sizeof(u32);
1496                 radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
1497                 radeon_cp_discard_buffer( dev, buf );
1498
1499                 /* Update the input parameters for next time */
1500                 image->y += height;
1501                 image->height -= height;
1502                 image->data = (const u8 *)image->data + size;
1503         } while (image->height > 0);
1504
1505         /* Flush the pixel cache after the blit completes.  This ensures
1506          * the texture data is written out to memory before rendering
1507          * continues.
1508          */
1509         BEGIN_RING( 4 );
1510         RADEON_FLUSH_CACHE();
1511         RADEON_WAIT_UNTIL_2D_IDLE();
1512         ADVANCE_RING();
1513         return 0;
1514 }
1515
1516
1517 static void radeon_cp_dispatch_stipple( drm_device_t *dev, u32 *stipple )
1518 {
1519         drm_radeon_private_t *dev_priv = dev->dev_private;
1520         int i;
1521         RING_LOCALS;
1522         DRM_DEBUG( "\n" );
1523
1524         BEGIN_RING( 35 );
1525
1526         OUT_RING( CP_PACKET0( RADEON_RE_STIPPLE_ADDR, 0 ) );
1527         OUT_RING( 0x00000000 );
1528
1529         OUT_RING( CP_PACKET0_TABLE( RADEON_RE_STIPPLE_DATA, 31 ) );
1530         for ( i = 0 ; i < 32 ; i++ ) {
1531                 OUT_RING( stipple[i] );
1532         }
1533
1534         ADVANCE_RING();
1535 }
1536
1537
1538 /* ================================================================
1539  * IOCTL functions
1540  */
1541
1542 int radeon_cp_clear( DRM_IOCTL_ARGS )
1543 {
1544         DRM_DEVICE;
1545         drm_radeon_private_t *dev_priv = dev->dev_private;
1546         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1547         drm_radeon_clear_t clear;
1548         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1549         DRM_DEBUG( "\n" );
1550
1551         LOCK_TEST_WITH_RETURN( dev, filp );
1552
1553         DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t *)data,
1554                              sizeof(clear) );
1555
1556         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1557
1558         if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
1559                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
1560
1561         if ( DRM_COPY_FROM_USER( &depth_boxes, clear.depth_boxes,
1562                              sarea_priv->nbox * sizeof(depth_boxes[0]) ) )
1563                 return DRM_ERR(EFAULT);
1564
1565         radeon_cp_dispatch_clear( dev, &clear, depth_boxes );
1566
1567         COMMIT_RING();
1568         return 0;
1569 }
1570
1571
1572 /* Not sure why this isn't set all the time:
1573  */ 
1574 static int radeon_do_init_pageflip( drm_device_t *dev )
1575 {
1576         drm_radeon_private_t *dev_priv = dev->dev_private;
1577         RING_LOCALS;
1578
1579         DRM_DEBUG( "\n" );
1580
1581         BEGIN_RING( 6 );
1582         RADEON_WAIT_UNTIL_3D_IDLE();
1583         OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET_CNTL, 0 ) );
1584         OUT_RING( RADEON_READ( RADEON_CRTC_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
1585         OUT_RING( CP_PACKET0( RADEON_CRTC2_OFFSET_CNTL, 0 ) );
1586         OUT_RING( RADEON_READ( RADEON_CRTC2_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
1587         ADVANCE_RING();
1588
1589         dev_priv->page_flipping = 1;
1590         dev_priv->current_page = 0;
1591         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
1592
1593         return 0;
1594 }
1595
1596 /* Called whenever a client dies, from DRM(release).
1597  * NOTE:  Lock isn't necessarily held when this is called!
1598  */
1599 int radeon_do_cleanup_pageflip( drm_device_t *dev )
1600 {
1601         drm_radeon_private_t *dev_priv = dev->dev_private;
1602         DRM_DEBUG( "\n" );
1603
1604         if (dev_priv->current_page != 0)
1605                 radeon_cp_dispatch_flip( dev );
1606
1607         dev_priv->page_flipping = 0;
1608         return 0;
1609 }
1610
1611 /* Swapping and flipping are different operations, need different ioctls.
1612  * They can & should be intermixed to support multiple 3d windows.  
1613  */
1614 int radeon_cp_flip( DRM_IOCTL_ARGS )
1615 {
1616         DRM_DEVICE;
1617         drm_radeon_private_t *dev_priv = dev->dev_private;
1618         DRM_DEBUG( "\n" );
1619
1620         LOCK_TEST_WITH_RETURN( dev, filp );
1621
1622         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1623
1624         if (!dev_priv->page_flipping) 
1625                 radeon_do_init_pageflip( dev );
1626                 
1627         radeon_cp_dispatch_flip( dev );
1628
1629         COMMIT_RING();
1630         return 0;
1631 }
1632
1633 int radeon_cp_swap( DRM_IOCTL_ARGS )
1634 {
1635         DRM_DEVICE;
1636         drm_radeon_private_t *dev_priv = dev->dev_private;
1637         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1638         DRM_DEBUG( "\n" );
1639
1640         LOCK_TEST_WITH_RETURN( dev, filp );
1641
1642         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1643
1644         if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
1645                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
1646
1647         radeon_cp_dispatch_swap( dev );
1648         dev_priv->sarea_priv->ctx_owner = 0;
1649
1650         COMMIT_RING();
1651         return 0;
1652 }
1653
1654 int radeon_cp_vertex( DRM_IOCTL_ARGS )
1655 {
1656         DRM_DEVICE;
1657         drm_radeon_private_t *dev_priv = dev->dev_private;
1658         drm_file_t *filp_priv;
1659         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1660         drm_device_dma_t *dma = dev->dma;
1661         drm_buf_t *buf;
1662         drm_radeon_vertex_t vertex;
1663         drm_radeon_tcl_prim_t prim;
1664
1665         LOCK_TEST_WITH_RETURN( dev, filp );
1666
1667         if ( !dev_priv ) {
1668                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1669                 return DRM_ERR(EINVAL);
1670         }
1671
1672         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1673
1674         DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t *)data,
1675                              sizeof(vertex) );
1676
1677         DRM_DEBUG( "pid=%d index=%d count=%d discard=%d\n",
1678                    DRM_CURRENTPID,
1679                    vertex.idx, vertex.count, vertex.discard );
1680
1681         if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
1682                 DRM_ERROR( "buffer index %d (of %d max)\n",
1683                            vertex.idx, dma->buf_count - 1 );
1684                 return DRM_ERR(EINVAL);
1685         }
1686         if ( vertex.prim < 0 ||
1687              vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
1688                 DRM_ERROR( "buffer prim %d\n", vertex.prim );
1689                 return DRM_ERR(EINVAL);
1690         }
1691
1692         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1693         VB_AGE_TEST_WITH_RETURN( dev_priv );
1694
1695         buf = dma->buflist[vertex.idx];
1696
1697         if ( buf->filp != filp ) {
1698                 DRM_ERROR( "process %d using buffer owned by %p\n",
1699                            DRM_CURRENTPID, buf->filp );
1700                 return DRM_ERR(EINVAL);
1701         }
1702         if ( buf->pending ) {
1703                 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
1704                 return DRM_ERR(EINVAL);
1705         }
1706
1707         /* Build up a prim_t record:
1708          */
1709         if (vertex.count) {
1710                 buf->used = vertex.count; /* not used? */
1711
1712                 if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
1713                         if ( radeon_emit_state( dev_priv, filp_priv,
1714                                                 &sarea_priv->context_state,
1715                                                 sarea_priv->tex_state,
1716                                                 sarea_priv->dirty ) ) {
1717                                 DRM_ERROR( "radeon_emit_state failed\n" );
1718                                 return DRM_ERR( EINVAL );
1719                         }
1720
1721                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
1722                                                RADEON_UPLOAD_TEX1IMAGES |
1723                                                RADEON_UPLOAD_TEX2IMAGES |
1724                                                RADEON_REQUIRE_QUIESCENCE);
1725                 }
1726
1727                 prim.start = 0;
1728                 prim.finish = vertex.count; /* unused */
1729                 prim.prim = vertex.prim;
1730                 prim.numverts = vertex.count;
1731                 prim.vc_format = dev_priv->sarea_priv->vc_format;
1732                 
1733                 radeon_cp_dispatch_vertex( dev, buf, &prim );
1734         }
1735
1736         if (vertex.discard) {
1737                 radeon_cp_discard_buffer( dev, buf );
1738         }
1739
1740         COMMIT_RING();
1741         return 0;
1742 }
1743
1744 int radeon_cp_indices( DRM_IOCTL_ARGS )
1745 {
1746         DRM_DEVICE;
1747         drm_radeon_private_t *dev_priv = dev->dev_private;
1748         drm_file_t *filp_priv;
1749         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1750         drm_device_dma_t *dma = dev->dma;
1751         drm_buf_t *buf;
1752         drm_radeon_indices_t elts;
1753         drm_radeon_tcl_prim_t prim;
1754         int count;
1755
1756         LOCK_TEST_WITH_RETURN( dev, filp );
1757
1758         if ( !dev_priv ) {
1759                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1760                 return DRM_ERR(EINVAL);
1761         }
1762
1763         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1764
1765         DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t *)data,
1766                              sizeof(elts) );
1767
1768         DRM_DEBUG( "pid=%d index=%d start=%d end=%d discard=%d\n",
1769                    DRM_CURRENTPID,
1770                    elts.idx, elts.start, elts.end, elts.discard );
1771
1772         if ( elts.idx < 0 || elts.idx >= dma->buf_count ) {
1773                 DRM_ERROR( "buffer index %d (of %d max)\n",
1774                            elts.idx, dma->buf_count - 1 );
1775                 return DRM_ERR(EINVAL);
1776         }
1777         if ( elts.prim < 0 ||
1778              elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
1779                 DRM_ERROR( "buffer prim %d\n", elts.prim );
1780                 return DRM_ERR(EINVAL);
1781         }
1782
1783         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1784         VB_AGE_TEST_WITH_RETURN( dev_priv );
1785
1786         buf = dma->buflist[elts.idx];
1787
1788         if ( buf->filp != filp ) {
1789                 DRM_ERROR( "process %d using buffer owned by %p\n",
1790                            DRM_CURRENTPID, buf->filp );
1791                 return DRM_ERR(EINVAL);
1792         }
1793         if ( buf->pending ) {
1794                 DRM_ERROR( "sending pending buffer %d\n", elts.idx );
1795                 return DRM_ERR(EINVAL);
1796         }
1797
1798         count = (elts.end - elts.start) / sizeof(u16);
1799         elts.start -= RADEON_INDEX_PRIM_OFFSET;
1800
1801         if ( elts.start & 0x7 ) {
1802                 DRM_ERROR( "misaligned buffer 0x%x\n", elts.start );
1803                 return DRM_ERR(EINVAL);
1804         }
1805         if ( elts.start < buf->used ) {
1806                 DRM_ERROR( "no header 0x%x - 0x%x\n", elts.start, buf->used );
1807                 return DRM_ERR(EINVAL);
1808         }
1809
1810         buf->used = elts.end;
1811
1812         if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
1813                 if ( radeon_emit_state( dev_priv, filp_priv,
1814                                         &sarea_priv->context_state,
1815                                         sarea_priv->tex_state,
1816                                         sarea_priv->dirty ) ) {
1817                         DRM_ERROR( "radeon_emit_state failed\n" );
1818                         return DRM_ERR( EINVAL );
1819                 }
1820
1821                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
1822                                        RADEON_UPLOAD_TEX1IMAGES |
1823                                        RADEON_UPLOAD_TEX2IMAGES |
1824                                        RADEON_REQUIRE_QUIESCENCE);
1825         }
1826
1827
1828         /* Build up a prim_t record:
1829          */
1830         prim.start = elts.start;
1831         prim.finish = elts.end; 
1832         prim.prim = elts.prim;
1833         prim.offset = 0;        /* offset from start of dma buffers */
1834         prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
1835         prim.vc_format = dev_priv->sarea_priv->vc_format;
1836         
1837         radeon_cp_dispatch_indices( dev, buf, &prim );
1838         if (elts.discard) {
1839                 radeon_cp_discard_buffer( dev, buf );
1840         }
1841
1842         COMMIT_RING();
1843         return 0;
1844 }
1845
1846 int radeon_cp_texture( DRM_IOCTL_ARGS )
1847 {
1848         DRM_DEVICE;
1849         drm_radeon_private_t *dev_priv = dev->dev_private;
1850         drm_radeon_texture_t tex;
1851         drm_radeon_tex_image_t image;
1852         int ret;
1853
1854         LOCK_TEST_WITH_RETURN( dev, filp );
1855
1856         DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t *)data, sizeof(tex) );
1857
1858         if ( tex.image == NULL ) {
1859                 DRM_ERROR( "null texture image!\n" );
1860                 return DRM_ERR(EINVAL);
1861         }
1862
1863         if ( DRM_COPY_FROM_USER( &image,
1864                              (drm_radeon_tex_image_t *)tex.image,
1865                              sizeof(image) ) )
1866                 return DRM_ERR(EFAULT);
1867
1868         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1869         VB_AGE_TEST_WITH_RETURN( dev_priv );
1870
1871         ret = radeon_cp_dispatch_texture( filp, dev, &tex, &image );
1872
1873         COMMIT_RING();
1874         return ret;
1875 }
1876
1877 int radeon_cp_stipple( DRM_IOCTL_ARGS )
1878 {
1879         DRM_DEVICE;
1880         drm_radeon_private_t *dev_priv = dev->dev_private;
1881         drm_radeon_stipple_t stipple;
1882         u32 mask[32];
1883
1884         LOCK_TEST_WITH_RETURN( dev, filp );
1885
1886         DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t *)data,
1887                              sizeof(stipple) );
1888
1889         if ( DRM_COPY_FROM_USER( &mask, stipple.mask, 32 * sizeof(u32) ) )
1890                 return DRM_ERR(EFAULT);
1891
1892         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1893
1894         radeon_cp_dispatch_stipple( dev, mask );
1895
1896         COMMIT_RING();
1897         return 0;
1898 }
1899
1900 int radeon_cp_indirect( DRM_IOCTL_ARGS )
1901 {
1902         DRM_DEVICE;
1903         drm_radeon_private_t *dev_priv = dev->dev_private;
1904         drm_device_dma_t *dma = dev->dma;
1905         drm_buf_t *buf;
1906         drm_radeon_indirect_t indirect;
1907         RING_LOCALS;
1908
1909         LOCK_TEST_WITH_RETURN( dev, filp );
1910
1911         if ( !dev_priv ) {
1912                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1913                 return DRM_ERR(EINVAL);
1914         }
1915
1916         DRM_COPY_FROM_USER_IOCTL( indirect, (drm_radeon_indirect_t *)data,
1917                              sizeof(indirect) );
1918
1919         DRM_DEBUG( "indirect: idx=%d s=%d e=%d d=%d\n",
1920                    indirect.idx, indirect.start,
1921                    indirect.end, indirect.discard );
1922
1923         if ( indirect.idx < 0 || indirect.idx >= dma->buf_count ) {
1924                 DRM_ERROR( "buffer index %d (of %d max)\n",
1925                            indirect.idx, dma->buf_count - 1 );
1926                 return DRM_ERR(EINVAL);
1927         }
1928
1929         buf = dma->buflist[indirect.idx];
1930
1931         if ( buf->filp != filp ) {
1932                 DRM_ERROR( "process %d using buffer owned by %p\n",
1933                            DRM_CURRENTPID, buf->filp );
1934                 return DRM_ERR(EINVAL);
1935         }
1936         if ( buf->pending ) {
1937                 DRM_ERROR( "sending pending buffer %d\n", indirect.idx );
1938                 return DRM_ERR(EINVAL);
1939         }
1940
1941         if ( indirect.start < buf->used ) {
1942                 DRM_ERROR( "reusing indirect: start=0x%x actual=0x%x\n",
1943                            indirect.start, buf->used );
1944                 return DRM_ERR(EINVAL);
1945         }
1946
1947         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1948         VB_AGE_TEST_WITH_RETURN( dev_priv );
1949
1950         buf->used = indirect.end;
1951
1952         /* Wait for the 3D stream to idle before the indirect buffer
1953          * containing 2D acceleration commands is processed.
1954          */
1955         BEGIN_RING( 2 );
1956
1957         RADEON_WAIT_UNTIL_3D_IDLE();
1958
1959         ADVANCE_RING();
1960
1961         /* Dispatch the indirect buffer full of commands from the
1962          * X server.  This is insecure and is thus only available to
1963          * privileged clients.
1964          */
1965         radeon_cp_dispatch_indirect( dev, buf, indirect.start, indirect.end );
1966         if (indirect.discard) {
1967                 radeon_cp_discard_buffer( dev, buf );
1968         }
1969
1970
1971         COMMIT_RING();
1972         return 0;
1973 }
1974
1975 int radeon_cp_vertex2( DRM_IOCTL_ARGS )
1976 {
1977         DRM_DEVICE;
1978         drm_radeon_private_t *dev_priv = dev->dev_private;
1979         drm_file_t *filp_priv;
1980         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1981         drm_device_dma_t *dma = dev->dma;
1982         drm_buf_t *buf;
1983         drm_radeon_vertex2_t vertex;
1984         int i;
1985         unsigned char laststate;
1986
1987         LOCK_TEST_WITH_RETURN( dev, filp );
1988
1989         if ( !dev_priv ) {
1990                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1991                 return DRM_ERR(EINVAL);
1992         }
1993
1994         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1995
1996         DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex2_t *)data,
1997                              sizeof(vertex) );
1998
1999         DRM_DEBUG( "pid=%d index=%d discard=%d\n",
2000                    DRM_CURRENTPID,
2001                    vertex.idx, vertex.discard );
2002
2003         if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2004                 DRM_ERROR( "buffer index %d (of %d max)\n",
2005                            vertex.idx, dma->buf_count - 1 );
2006                 return DRM_ERR(EINVAL);
2007         }
2008
2009         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2010         VB_AGE_TEST_WITH_RETURN( dev_priv );
2011
2012         buf = dma->buflist[vertex.idx];
2013
2014         if ( buf->filp != filp ) {
2015                 DRM_ERROR( "process %d using buffer owned by %p\n",
2016                            DRM_CURRENTPID, buf->filp );
2017                 return DRM_ERR(EINVAL);
2018         }
2019
2020         if ( buf->pending ) {
2021                 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2022                 return DRM_ERR(EINVAL);
2023         }
2024         
2025         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2026                 return DRM_ERR(EINVAL);
2027
2028         for (laststate = 0xff, i = 0 ; i < vertex.nr_prims ; i++) {
2029                 drm_radeon_prim_t prim;
2030                 drm_radeon_tcl_prim_t tclprim;
2031                 
2032                 if ( DRM_COPY_FROM_USER( &prim, &vertex.prim[i], sizeof(prim) ) )
2033                         return DRM_ERR(EFAULT);
2034                 
2035                 if ( prim.stateidx != laststate ) {
2036                         drm_radeon_state_t state;                              
2037                                 
2038                         if ( DRM_COPY_FROM_USER( &state, 
2039                                              &vertex.state[prim.stateidx], 
2040                                              sizeof(state) ) )
2041                                 return DRM_ERR(EFAULT);
2042
2043                         if ( radeon_emit_state2( dev_priv, filp_priv, &state ) ) {
2044                                 DRM_ERROR( "radeon_emit_state2 failed\n" );
2045                                 return DRM_ERR( EINVAL );
2046                         }
2047
2048                         laststate = prim.stateidx;
2049                 }
2050
2051                 tclprim.start = prim.start;
2052                 tclprim.finish = prim.finish;
2053                 tclprim.prim = prim.prim;
2054                 tclprim.vc_format = prim.vc_format;
2055
2056                 if ( prim.prim & RADEON_PRIM_WALK_IND ) {
2057                         tclprim.offset = prim.numverts * 64;
2058                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2059
2060                         radeon_cp_dispatch_indices( dev, buf, &tclprim );
2061                 } else {
2062                         tclprim.numverts = prim.numverts;
2063                         tclprim.offset = 0; /* not used */
2064
2065                         radeon_cp_dispatch_vertex( dev, buf, &tclprim );
2066                 }
2067                 
2068                 if (sarea_priv->nbox == 1)
2069                         sarea_priv->nbox = 0;
2070         }
2071
2072         if ( vertex.discard ) {
2073                 radeon_cp_discard_buffer( dev, buf );
2074         }
2075
2076         COMMIT_RING();
2077         return 0;
2078 }
2079
2080
2081 static int radeon_emit_packets( 
2082         drm_radeon_private_t *dev_priv,
2083         drm_file_t *filp_priv,
2084         drm_radeon_cmd_header_t header,
2085         drm_radeon_cmd_buffer_t *cmdbuf )
2086 {
2087         int id = (int)header.packet.packet_id;
2088         int sz, reg;
2089         int *data = (int *)cmdbuf->buf;
2090         RING_LOCALS;
2091    
2092         if (id >= RADEON_MAX_STATE_PACKETS)
2093                 return DRM_ERR(EINVAL);
2094
2095         sz = packet[id].len;
2096         reg = packet[id].start;
2097
2098         if (sz * sizeof(int) > cmdbuf->bufsz) {
2099                 DRM_ERROR( "Packet size provided larger than data provided\n" );
2100                 return DRM_ERR(EINVAL);
2101         }
2102
2103         if ( radeon_check_and_fixup_packets( dev_priv, filp_priv, id, data ) ) {
2104                 DRM_ERROR( "Packet verification failed\n" );
2105                 return DRM_ERR( EINVAL );
2106         }
2107
2108         BEGIN_RING(sz+1);
2109         OUT_RING( CP_PACKET0( reg, (sz-1) ) );
2110         OUT_RING_USER_TABLE( data, sz );
2111         ADVANCE_RING();
2112
2113         cmdbuf->buf += sz * sizeof(int);
2114         cmdbuf->bufsz -= sz * sizeof(int);
2115         return 0;
2116 }
2117
2118 static __inline__ int radeon_emit_scalars( 
2119         drm_radeon_private_t *dev_priv,
2120         drm_radeon_cmd_header_t header,
2121         drm_radeon_cmd_buffer_t *cmdbuf )
2122 {
2123         int sz = header.scalars.count;
2124         int *data = (int *)cmdbuf->buf;
2125         int start = header.scalars.offset;
2126         int stride = header.scalars.stride;
2127         RING_LOCALS;
2128
2129         BEGIN_RING( 3+sz );
2130         OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2131         OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2132         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2133         OUT_RING_USER_TABLE( data, sz );
2134         ADVANCE_RING();
2135         cmdbuf->buf += sz * sizeof(int);
2136         cmdbuf->bufsz -= sz * sizeof(int);
2137         return 0;
2138 }
2139
2140 /* God this is ugly
2141  */
2142 static __inline__ int radeon_emit_scalars2( 
2143         drm_radeon_private_t *dev_priv,
2144         drm_radeon_cmd_header_t header,
2145         drm_radeon_cmd_buffer_t *cmdbuf )
2146 {
2147         int sz = header.scalars.count;
2148         int *data = (int *)cmdbuf->buf;
2149         int start = ((unsigned int)header.scalars.offset) + 0x100;
2150         int stride = header.scalars.stride;
2151         RING_LOCALS;
2152
2153         BEGIN_RING( 3+sz );
2154         OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2155         OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2156         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2157         OUT_RING_USER_TABLE( data, sz );
2158         ADVANCE_RING();
2159         cmdbuf->buf += sz * sizeof(int);
2160         cmdbuf->bufsz -= sz * sizeof(int);
2161         return 0;
2162 }
2163
2164 static __inline__ int radeon_emit_vectors( 
2165         drm_radeon_private_t *dev_priv,
2166         drm_radeon_cmd_header_t header,
2167         drm_radeon_cmd_buffer_t *cmdbuf )
2168 {
2169         int sz = header.vectors.count;
2170         int *data = (int *)cmdbuf->buf;
2171         int start = header.vectors.offset;
2172         int stride = header.vectors.stride;
2173         RING_LOCALS;
2174
2175         BEGIN_RING( 3+sz );
2176         OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) );
2177         OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2178         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) );
2179         OUT_RING_USER_TABLE( data, sz );
2180         ADVANCE_RING();
2181
2182         cmdbuf->buf += sz * sizeof(int);
2183         cmdbuf->bufsz -= sz * sizeof(int);
2184         return 0;
2185 }
2186
2187
2188 static int radeon_emit_packet3( drm_device_t *dev,
2189                                 drm_file_t *filp_priv,
2190                                 drm_radeon_cmd_buffer_t *cmdbuf )
2191 {
2192         drm_radeon_private_t *dev_priv = dev->dev_private;
2193         unsigned int cmdsz;
2194         int *cmd = (int *)cmdbuf->buf, ret;
2195         RING_LOCALS;
2196
2197         DRM_DEBUG("\n");
2198
2199         if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2200                                                      cmdbuf, &cmdsz ) ) ) {
2201                 DRM_ERROR( "Packet verification failed\n" );
2202                 return ret;
2203         }
2204
2205         BEGIN_RING( cmdsz );
2206         OUT_RING_USER_TABLE( cmd, cmdsz );
2207         ADVANCE_RING();
2208
2209         cmdbuf->buf += cmdsz * 4;
2210         cmdbuf->bufsz -= cmdsz * 4;
2211         return 0;
2212 }
2213
2214
2215 static int radeon_emit_packet3_cliprect( drm_device_t *dev,
2216                                          drm_file_t *filp_priv,
2217                                          drm_radeon_cmd_buffer_t *cmdbuf,
2218                                          int orig_nbox )
2219 {
2220         drm_radeon_private_t *dev_priv = dev->dev_private;
2221         drm_clip_rect_t box;
2222         unsigned int cmdsz;
2223         int *cmd = (int *)cmdbuf->buf, ret;
2224         drm_clip_rect_t *boxes = cmdbuf->boxes;
2225         int i = 0;
2226         RING_LOCALS;
2227
2228         DRM_DEBUG("\n");
2229
2230         if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2231                                                      cmdbuf, &cmdsz ) ) ) {
2232                 DRM_ERROR( "Packet verification failed\n" );
2233                 return ret;
2234         }
2235
2236         if (!orig_nbox)
2237                 goto out;
2238
2239         do {
2240                 if ( i < cmdbuf->nbox ) {
2241                         if (DRM_COPY_FROM_USER_UNCHECKED( &box, &boxes[i], sizeof(box) ))
2242                                 return DRM_ERR(EFAULT);
2243                         /* FIXME The second and subsequent times round
2244                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2245                          * calling emit_clip_rect(). This fixes a
2246                          * lockup on fast machines when sending
2247                          * several cliprects with a cmdbuf, as when
2248                          * waving a 2D window over a 3D
2249                          * window. Something in the commands from user
2250                          * space seems to hang the card when they're
2251                          * sent several times in a row. That would be
2252                          * the correct place to fix it but this works
2253                          * around it until I can figure that out - Tim
2254                          * Smith */
2255                         if ( i ) {
2256                                 BEGIN_RING( 2 );
2257                                 RADEON_WAIT_UNTIL_3D_IDLE();
2258                                 ADVANCE_RING();
2259                         }
2260                         radeon_emit_clip_rect( dev_priv, &box );
2261                 }
2262                 
2263                 BEGIN_RING( cmdsz );
2264                 OUT_RING_USER_TABLE( cmd, cmdsz );
2265                 ADVANCE_RING();
2266
2267         } while ( ++i < cmdbuf->nbox );
2268         if (cmdbuf->nbox == 1)
2269                 cmdbuf->nbox = 0;
2270
2271  out:
2272         cmdbuf->buf += cmdsz * 4;
2273         cmdbuf->bufsz -= cmdsz * 4;
2274         return 0;
2275 }
2276
2277
2278 static int radeon_emit_wait( drm_device_t *dev, int flags )
2279 {
2280         drm_radeon_private_t *dev_priv = dev->dev_private;
2281         RING_LOCALS;
2282
2283         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2284         switch (flags) {
2285         case RADEON_WAIT_2D:
2286                 BEGIN_RING( 2 );
2287                 RADEON_WAIT_UNTIL_2D_IDLE(); 
2288                 ADVANCE_RING();
2289                 break;
2290         case RADEON_WAIT_3D:
2291                 BEGIN_RING( 2 );
2292                 RADEON_WAIT_UNTIL_3D_IDLE(); 
2293                 ADVANCE_RING();
2294                 break;
2295         case RADEON_WAIT_2D|RADEON_WAIT_3D:
2296                 BEGIN_RING( 2 );
2297                 RADEON_WAIT_UNTIL_IDLE(); 
2298                 ADVANCE_RING();
2299                 break;
2300         default:
2301                 return DRM_ERR(EINVAL);
2302         }
2303
2304         return 0;
2305 }
2306
2307 int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
2308 {
2309         DRM_DEVICE;
2310         drm_radeon_private_t *dev_priv = dev->dev_private;
2311         drm_file_t *filp_priv;
2312         drm_device_dma_t *dma = dev->dma;
2313         drm_buf_t *buf = 0;
2314         int idx;
2315         drm_radeon_cmd_buffer_t cmdbuf;
2316         drm_radeon_cmd_header_t header;
2317         int orig_nbox;
2318
2319         LOCK_TEST_WITH_RETURN( dev, filp );
2320
2321         if ( !dev_priv ) {
2322                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2323                 return DRM_ERR(EINVAL);
2324         }
2325
2326         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2327
2328         DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_radeon_cmd_buffer_t *)data,
2329                              sizeof(cmdbuf) );
2330
2331         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2332         VB_AGE_TEST_WITH_RETURN( dev_priv );
2333
2334
2335         if (DRM_VERIFYAREA_READ( cmdbuf.buf, cmdbuf.bufsz ))
2336                 return DRM_ERR(EFAULT);
2337
2338         if (cmdbuf.nbox &&
2339             DRM_VERIFYAREA_READ(cmdbuf.boxes, 
2340                          cmdbuf.nbox * sizeof(drm_clip_rect_t)))
2341                 return DRM_ERR(EFAULT);
2342
2343         orig_nbox = cmdbuf.nbox;
2344
2345         while ( cmdbuf.bufsz >= sizeof(header) ) {
2346                 
2347                 if (DRM_GET_USER_UNCHECKED( header.i, (int *)cmdbuf.buf )) {
2348                         DRM_ERROR("__get_user %p\n", cmdbuf.buf);
2349                         return DRM_ERR(EFAULT);
2350                 }
2351
2352                 cmdbuf.buf += sizeof(header);
2353                 cmdbuf.bufsz -= sizeof(header);
2354
2355                 switch (header.header.cmd_type) {
2356                 case RADEON_CMD_PACKET: 
2357                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2358                         if (radeon_emit_packets( dev_priv, filp_priv, header, &cmdbuf )) {
2359                                 DRM_ERROR("radeon_emit_packets failed\n");
2360                                 return DRM_ERR(EINVAL);
2361                         }
2362                         break;
2363
2364                 case RADEON_CMD_SCALARS:
2365                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2366                         if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {
2367                                 DRM_ERROR("radeon_emit_scalars failed\n");
2368                                 return DRM_ERR(EINVAL);
2369                         }
2370                         break;
2371
2372                 case RADEON_CMD_VECTORS:
2373                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2374                         if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {
2375                                 DRM_ERROR("radeon_emit_vectors failed\n");
2376                                 return DRM_ERR(EINVAL);
2377                         }
2378                         break;
2379
2380                 case RADEON_CMD_DMA_DISCARD:
2381                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2382                         idx = header.dma.buf_idx;
2383                         if ( idx < 0 || idx >= dma->buf_count ) {
2384                                 DRM_ERROR( "buffer index %d (of %d max)\n",
2385                                            idx, dma->buf_count - 1 );
2386                                 return DRM_ERR(EINVAL);
2387                         }
2388
2389                         buf = dma->buflist[idx];
2390                         if ( buf->filp != filp || buf->pending ) {
2391                                 DRM_ERROR( "bad buffer %p %p %d\n",
2392                                            buf->filp, filp, buf->pending);
2393                                 return DRM_ERR(EINVAL);
2394                         }
2395
2396                         radeon_cp_discard_buffer( dev, buf );
2397                         break;
2398
2399                 case RADEON_CMD_PACKET3:
2400                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2401                         if (radeon_emit_packet3( dev, filp_priv, &cmdbuf )) {
2402                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2403                                 return DRM_ERR(EINVAL);
2404                         }
2405                         break;
2406
2407                 case RADEON_CMD_PACKET3_CLIP:
2408                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2409                         if (radeon_emit_packet3_cliprect( dev, filp_priv, &cmdbuf, orig_nbox )) {
2410                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2411                                 return DRM_ERR(EINVAL);
2412                         }
2413                         break;
2414
2415                 case RADEON_CMD_SCALARS2:
2416                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2417                         if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) {
2418                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2419                                 return DRM_ERR(EINVAL);
2420                         }
2421                         break;
2422
2423                 case RADEON_CMD_WAIT:
2424                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2425                         if (radeon_emit_wait( dev, header.wait.flags )) {
2426                                 DRM_ERROR("radeon_emit_wait failed\n");
2427                                 return DRM_ERR(EINVAL);
2428                         }
2429                         break;
2430                 default:
2431                         DRM_ERROR("bad cmd_type %d at %p\n", 
2432                                   header.header.cmd_type,
2433                                   cmdbuf.buf - sizeof(header));
2434                         return DRM_ERR(EINVAL);
2435                 }
2436         }
2437
2438
2439         DRM_DEBUG("DONE\n");
2440         COMMIT_RING();
2441         return 0;
2442 }
2443
2444
2445
2446 int radeon_cp_getparam( DRM_IOCTL_ARGS )
2447 {
2448         DRM_DEVICE;
2449         drm_radeon_private_t *dev_priv = dev->dev_private;
2450         drm_radeon_getparam_t param;
2451         int value;
2452
2453         if ( !dev_priv ) {
2454                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2455                 return DRM_ERR(EINVAL);
2456         }
2457
2458         DRM_COPY_FROM_USER_IOCTL( param, (drm_radeon_getparam_t *)data,
2459                              sizeof(param) );
2460
2461         DRM_DEBUG( "pid=%d\n", DRM_CURRENTPID );
2462
2463         switch( param.param ) {
2464         case RADEON_PARAM_GART_BUFFER_OFFSET:
2465                 value = dev_priv->gart_buffers_offset;
2466                 break;
2467         case RADEON_PARAM_LAST_FRAME:
2468                 dev_priv->stats.last_frame_reads++;
2469                 value = GET_SCRATCH( 0 );
2470                 break;
2471         case RADEON_PARAM_LAST_DISPATCH:
2472                 value = GET_SCRATCH( 1 );
2473                 break;
2474         case RADEON_PARAM_LAST_CLEAR:
2475                 dev_priv->stats.last_clear_reads++;
2476                 value = GET_SCRATCH( 2 );
2477                 break;
2478         case RADEON_PARAM_IRQ_NR:
2479                 value = dev->irq;
2480                 break;
2481         case RADEON_PARAM_GART_BASE:
2482                 value = dev_priv->gart_vm_start;
2483                 break;
2484         case RADEON_PARAM_REGISTER_HANDLE:
2485                 value = dev_priv->mmio_offset;
2486                 break;
2487         case RADEON_PARAM_STATUS_HANDLE:
2488                 value = dev_priv->ring_rptr_offset;
2489                 break;
2490 #if BITS_PER_LONG == 32
2491         /*
2492          * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2493          * pointer which can't fit into an int-sized variable.  According to
2494          * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2495          * not supporting it shouldn't be a problem.  If the same functionality
2496          * is needed on 64-bit platforms, a new ioctl() would have to be added,
2497          * so backwards-compatibility for the embedded platforms can be
2498          * maintained.  --davidm 4-Feb-2004.
2499          */
2500         case RADEON_PARAM_SAREA_HANDLE:
2501                 /* The lock is the first dword in the sarea. */
2502                 value = (long)dev->lock.hw_lock;
2503                 break;
2504 #endif
2505         case RADEON_PARAM_GART_TEX_HANDLE:
2506                 value = dev_priv->gart_textures_offset;
2507                 break;
2508         default:
2509                 return DRM_ERR(EINVAL);
2510         }
2511
2512         if ( DRM_COPY_TO_USER( param.value, &value, sizeof(int) ) ) {
2513                 DRM_ERROR( "copy_to_user\n" );
2514                 return DRM_ERR(EFAULT);
2515         }
2516         
2517         return 0;
2518 }
2519
2520 int radeon_cp_setparam( DRM_IOCTL_ARGS ) {
2521         DRM_DEVICE;
2522         drm_radeon_private_t *dev_priv = dev->dev_private;
2523         drm_file_t *filp_priv;
2524         drm_radeon_setparam_t sp;
2525
2526         if ( !dev_priv ) {
2527                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2528                 return DRM_ERR( EINVAL );
2529         }
2530
2531         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2532
2533         DRM_COPY_FROM_USER_IOCTL( sp, ( drm_radeon_setparam_t* )data,
2534                                   sizeof( sp ) );
2535
2536         switch( sp.param ) {
2537         case RADEON_SETPARAM_FB_LOCATION:
2538                 filp_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
2539                 break;
2540         default:
2541                 DRM_DEBUG( "Invalid parameter %d\n", sp.param );
2542                 return DRM_ERR( EINVAL );
2543         }
2544
2545         return 0;
2546 }