upgrade to fedora-2.6.12-1.1398.FC4 + vserver 2.0.rc7
[linux-2.6.git] / net / tux / input.c
1 /*
2  * TUX - Integrated Application Protocols Layer and Object Cache
3  *
4  * Copyright (C) 2000, 2001, Ingo Molnar <mingo@redhat.com>
5  *
6  * input.c: handle requests arriving on accepted connections
7  */
8
9 #include <net/tux.h>
10 #include <linux/kmod.h>
11
12 /****************************************************************
13  *      This program is free software; you can redistribute it and/or modify
14  *      it under the terms of the GNU General Public License as published by
15  *      the Free Software Foundation; either version 2, or (at your option)
16  *      any later version.
17  *
18  *      This program is distributed in the hope that it will be useful,
19  *      but WITHOUT ANY WARRANTY; without even the implied warranty of
20  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  *      GNU General Public License for more details.
22  *
23  *      You should have received a copy of the GNU General Public License
24  *      along with this program; if not, write to the Free Software
25  *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26  *
27  ****************************************************************/
28
29 void zap_request (tux_req_t *req, int cachemiss)
30 {
31         if (!req->error)
32                 TUX_BUG();
33         if (req->error == TUX_ERROR_CONN_TIMEOUT) {
34                 if (req->proto->request_timeout) {
35                         clear_keepalive(req);
36                         req->proto->request_timeout(req, cachemiss);
37                 } else {
38                         clear_keepalive(req);
39                         if (!cachemiss)
40                                 flush_request(req, 0);
41                         else {
42                                 add_tux_atom(req, flush_request);
43                                 add_req_to_workqueue(req);
44                         }
45                 }
46                 return;
47         }
48
49         if (!cachemiss && (req->error == TUX_ERROR_CONN_CLOSE)) {
50                 /*
51                  * Zap connection as fast as possible, there is
52                  * no valid client connection anymore:
53                  */
54                 clear_keepalive(req);
55                 flush_request(req, 0);
56         } else {
57                 if (req->error == TUX_ERROR_CONN_CLOSE) {
58                         clear_keepalive(req);
59                         add_tux_atom(req, flush_request);
60                 } else
61                         /*
62                          * Potentially redirect to the secondary server:
63                          */
64                         add_tux_atom(req, redirect_request);
65                 add_req_to_workqueue(req);
66         }
67 }
68
69 void __switch_docroot(tux_req_t *req)
70 {
71         if (!req->docroot_dentry || !req->docroot_mnt)
72                 TUX_BUG();
73         set_fs_root(current->fs, req->docroot_mnt, req->docroot_dentry);
74 }
75
76 struct dentry * __tux_lookup (tux_req_t *req, const char *filename,
77                          struct nameidata *base, struct vfsmount **mnt)
78 {
79         int err;
80
81         err = path_walk(filename, base);
82         if (err) {
83                 Dprintk("path_walk() returned with %d!\n", err);
84                 return ERR_PTR(err);
85         }
86         if (*mnt)
87                 TUX_BUG();
88         *mnt = base->mnt;
89
90         return base->dentry;
91 }
92
93 int tux_permission (struct inode *inode)
94 {
95         umode_t mode;
96         int err;
97
98         mode = inode->i_mode;
99         Dprintk("URL inode mode: %08x.\n", mode);
100
101         if (mode & tux_mode_forbidden)
102                 return -2;
103         /*
104          * at least one bit in the 'allowed' set has to
105          * be present to allow access.
106          */
107         if (!(mode & tux_mode_allowed))
108                 return -3;
109         err = permission(inode,MAY_READ,NULL);
110         return err;
111 }
112
113 struct dentry * tux_lookup (tux_req_t *req, const char *filename,
114                         const unsigned int flag, struct vfsmount **mnt)
115 {
116         struct dentry *dentry;
117         struct nameidata base = {};
118
119         Dprintk("tux_lookup(%p, %s, %d, virtual: %d, host: %s (%d).)\n", req, filename, flag, req->virtual, req->host, req->host_len);
120
121         base.flags = LOOKUP_FOLLOW|flag;
122         base.last_type = LAST_ROOT;
123         if (req->objectname[0] == '/') {
124                 base.dentry = dget(req->docroot_dentry);
125                 base.mnt = mntget(req->docroot_mnt);
126         } else {
127                 if (!req->cwd_dentry) {
128                         req->cwd_dentry = dget(req->docroot_dentry);
129                         req->cwd_mnt = mntget(req->docroot_mnt);
130                 }
131                 base.dentry = req->cwd_dentry;
132                 dget(base.dentry);
133                 base.mnt = mntget(req->cwd_mnt);
134         }
135
136         switch_docroot(req);
137         dentry = __tux_lookup (req, filename, &base, mnt);
138
139         Dprintk("looked up {%s} == dentry %p.\n", filename, dentry);
140
141         if (dentry && !IS_ERR(dentry) && !dentry->d_inode)
142                 TUX_BUG();
143         return dentry;
144 }
145
146 int lookup_object (tux_req_t *req, const unsigned int flag)
147 {
148         struct vfsmount *mnt = NULL;
149         struct dentry *dentry = NULL;
150         int perm;
151
152         dentry = tux_lookup(req, req->objectname, flag, &mnt);
153         if (!dentry || IS_ERR(dentry)) {
154                 if (PTR_ERR(dentry) == -EWOULDBLOCKIO)
155                         goto cachemiss;
156                 goto abort;
157         }
158         perm = tux_permission(dentry->d_inode);
159         /*
160          * Only regular files allowed.
161          */
162         if ((perm < 0) || !S_ISREG(dentry->d_inode->i_mode)) {
163                 req->status = 403;
164                 goto abort;
165         }
166         req->total_file_len = dentry->d_inode->i_size;
167 out:
168         install_req_dentry(req, dentry, mnt);
169         return 0;
170 cachemiss:
171         return 1;
172 abort:
173         if (dentry) {
174                 if (!IS_ERR(dentry))
175                         dput(dentry);
176                 dentry = NULL;
177         }
178         if (mnt) {
179                 if (!IS_ERR(mnt))
180                         mntput(mnt);
181                 mnt = NULL;
182         }
183         req_err(req);
184         goto out;
185 }
186
187 void install_req_dentry (tux_req_t *req, struct dentry *dentry, struct vfsmount *mnt)
188 {
189         if (req->dentry)
190                 TUX_BUG();
191         req->dentry = dentry;
192         if (req->mnt)
193                 TUX_BUG();
194         req->mnt = mnt;
195         if (req->in_file && req->in_file->f_dentry)
196                 TUX_BUG();
197         if (dentry)
198                 req->in_file = dentry_open(dget(dentry), NULL, O_RDONLY);
199 }
200
201 void release_req_dentry (tux_req_t *req)
202 {
203         if (!req->dentry) {
204                 if (req->in_file && req->in_file->f_dentry)
205                         TUX_BUG();
206                 return;
207         }
208
209         fput(req->in_file);
210         req->in_file = NULL;
211         dput(req->dentry);
212         req->dentry = NULL;
213         mntput(req->mnt);
214         req->mnt = NULL;
215 }
216
217 int __connection_too_fast (tux_req_t *req)
218 {
219         unsigned long curr_bw, delta, bytes;
220
221         bytes = req->total_bytes + req->bytes_sent;
222         if (!bytes)
223                 return 1;
224
225         delta = jiffies - req->first_timestamp;
226         if (!delta)
227                 delta++;
228         curr_bw = bytes * HZ / delta;
229
230         if (curr_bw > tux_max_output_bandwidth)
231                 return 2;
232         return 0;
233 }
234
235 void unidle_req (tux_req_t *req)
236 {
237         threadinfo_t *ti = req->ti;
238
239         Dprintk("UNIDLE req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), req->sock, req->sock->sk, req->keep_alive, req->status);
240         spin_lock_irq(&ti->work_lock);
241         if (req->magic != TUX_MAGIC)
242                 TUX_BUG();
243         if (!test_and_clear_bit(0, &req->idle_input)) {
244                 Dprintk("unidling %p, wasnt idle!\n", req);
245                 if (list_empty(&req->work))
246                         TUX_BUG();
247                 list_del(&req->work);
248                 DEBUG_DEL_LIST(&req->work);
249                 DEC_STAT(nr_work_pending);
250         } else {
251                 del_keepalive_timer(req);
252                 DEC_STAT(nr_idle_input_pending);
253                 Dprintk("unidled %p.\n", req);
254         }
255         if (req->idle_input)
256                 TUX_BUG();
257         spin_unlock_irq(&ti->work_lock);
258 }
259
260 #define GOTO_INCOMPLETE do { Dprintk("incomplete at %s:%d.\n", __FILE__, __LINE__); goto incomplete; } while (0)
261 #define GOTO_REDIRECT do { TDprintk("redirect at %s:%d.\n", __FILE__, __LINE__); goto redirect; } while (0)
262 #define GOTO_REDIRECT_NONIDLE do { TDprintk("redirect at %s:%d.\n", __FILE__, __LINE__); goto redirect_nonidle; } while (0)
263
264 static int read_request (struct socket *sock, char *buf, int max_size)
265 {
266         mm_segment_t oldmm;
267         struct kiocb iocb;
268         struct msghdr msg;
269         struct iovec iov;
270
271         int len;
272
273         msg.msg_name     = 0;
274         msg.msg_namelen  = 0;
275         msg.msg_iov      = &iov;
276         msg.msg_iovlen   = 1;
277         msg.msg_control  = NULL;
278         msg.msg_controllen = 0;
279         msg.msg_flags    = 0;
280         
281         msg.msg_iov->iov_base = buf;
282         msg.msg_iov->iov_len  = max_size;
283         
284         oldmm = get_fs(); set_fs(KERNEL_DS);
285
286 read_again:
287         init_sync_kiocb(&iocb, NULL);
288         len = sock->sk->sk_prot->recvmsg(&iocb, sock->sk, &msg, max_size,
289                                                 MSG_DONTWAIT, MSG_PEEK, NULL);
290         if (-EIOCBQUEUED == len)
291                 len = wait_on_sync_kiocb(&iocb);
292
293         /*
294          * We must not get a signal inbetween
295          */
296         if ((len == -EAGAIN) || (len == -ERESTARTSYS)) {
297                 if (!signal_pending(current)) {
298                         len = 0;
299                         goto out;
300                 }
301                 flush_all_signals();
302                 goto read_again;
303         }
304 out:
305         set_fs(oldmm);
306         return len;
307 }
308
309 /*
310  * We inline URG data so it's at the head of the normal receive queue.
311  */
312 static int zap_urg_data (struct socket *sock)
313 {
314         mm_segment_t oldmm;
315         struct msghdr msg;
316         struct iovec iov;
317         struct kiocb iocb;
318         int len;
319         char buf[10];
320
321         oldmm = get_fs(); set_fs(KERNEL_DS);
322
323         msg.msg_name            = 0;
324         msg.msg_namelen         = 0;
325         msg.msg_iov             = &iov;
326         msg.msg_iovlen          = 1;
327         msg.msg_control         = NULL;
328         msg.msg_controllen      = 0;
329         msg.msg_flags           = 0;
330         
331         msg.msg_iov->iov_base = buf;
332         msg.msg_iov->iov_len  = 2;
333
334 read_again:
335         init_sync_kiocb(&iocb, NULL);
336         len = sock->sk->sk_prot->recvmsg(&iocb, sock->sk, &msg, 2,
337                                                 MSG_DONTWAIT, 0, NULL);
338         if (-EIOCBQUEUED == len)
339                 len = wait_on_sync_kiocb(&iocb);
340         Dprintk("recvmsg(MSG_OOB) returned %d.\n", len);
341
342         /*
343          * We must not get a signal inbetween
344          */
345         if ((len == -EAGAIN) || (len == -ERESTARTSYS)) {
346                 if (!signal_pending(current)) {
347                         len = 0;
348                         goto out;
349                 }
350                 flush_all_signals();
351                 goto read_again;
352         }
353 out:
354         set_fs(oldmm);
355
356         Dprintk("in out:.. and will return %d.!\n", len);
357
358         return len;
359 }
360
361 void trunc_headers (tux_req_t *req)
362 {
363         struct sock *sk = req->sock->sk;
364         int len, addr_len = 0;
365         struct kiocb iocb;
366
367         if (!req->parsed_len)
368                 TUX_BUG();
369 repeat_trunc:
370         init_sync_kiocb(&iocb, NULL);
371         len = sk->sk_prot->recvmsg(&iocb, sk, NULL, req->parsed_len, 1, MSG_TRUNC, &addr_len);
372         if (-EIOCBQUEUED == len)
373                 len = wait_on_sync_kiocb(&iocb);
374         if ((len == -ERESTARTSYS) || (len == -EAGAIN)) {
375                 flush_all_signals();
376                 goto repeat_trunc;
377         }
378         Dprintk("truncated (TRUNC) %d bytes at %p. (wanted: %d.)\n", len, __builtin_return_address(0), req->parsed_len);
379
380
381
382         req->parsed_len = 0;
383 }
384
385 void print_req (tux_req_t *req)
386 {
387         struct sock *sk;
388
389         printk("PRINT req %p <%p>, sock %p\n",
390                         req, __builtin_return_address(0), req->sock);
391         printk("... idx: %d\n", req->atom_idx);
392         if (req->sock) {
393                 sk = req->sock->sk;
394                 printk("... sock %p, sk %p, sk->state: %d, sk->err: %d\n", req->sock, sk, sk->sk_state, sk->sk_err);
395                 printk("... write_queue: %d, receive_queue: %d, error_queue: %d, keepalive: %d, status: %d\n", !skb_queue_empty(&sk->sk_write_queue), !skb_queue_empty(&sk->sk_receive_queue), !skb_queue_empty(&sk->sk_error_queue), req->keep_alive, req->status);
396                 printk("...tp->send_head: %p\n", sk->sk_send_head);
397                 printk("...tp->snd_una: %08x\n", tcp_sk(sk)->snd_una);
398                 printk("...tp->snd_nxt: %08x\n", tcp_sk(sk)->snd_nxt);
399                 printk("...tp->packets_out: %08x\n", tcp_sk(sk)->packets_out);
400         }
401         printk("... meth:{%s}, uri:{%s}, query:{%s}, ver:{%s}\n", req->method_str ? req->method_str : "<null>", req->uri_str ? req->uri_str : "<null>", req->query_str ? req->query_str : "<null>", req->version_str ? req->version_str : "<null>");
402         printk("... post_data:{%s}(%d).\n", req->post_data_str, req->post_data_len);
403         printk("... headers: {%s}\n", req->headers);
404 }
405 /* 
406  * parse_request() reads all available TCP/IP data and prepares
407  * the request if the TUX request is complete. (we can get TUX
408  * requests in several packets.) Invalid requests are redirected
409  * to the secondary server.
410  */
411
412 void parse_request (tux_req_t *req, int cachemiss)
413 {
414         int len, parsed_len;
415         struct sock *sk = req->sock->sk;
416         struct tcp_sock *tp = tcp_sk(sk);
417         int was_keepalive = req->keep_alive;
418
419         if (req->magic != TUX_MAGIC)
420                 TUX_BUG();
421
422         SET_TIMESTAMP(req->parse_timestamp);
423
424         spin_lock_irq(&req->ti->work_lock);
425         add_keepalive_timer(req);
426         if (test_and_set_bit(0, &req->idle_input))
427                 TUX_BUG();
428         INC_STAT(nr_idle_input_pending);
429         spin_unlock_irq(&req->ti->work_lock);
430
431         Dprintk("idled request %p.\n", req);
432
433 restart:
434
435         if (tp->urg_data && !(tp->urg_data & TCP_URG_READ)) {
436                 len = zap_urg_data(req->sock);
437                 if (tp->urg_data && !(tp->urg_data & TCP_URG_READ)) {
438                         req->error = TUX_ERROR_CONN_CLOSE;
439                         goto redirect_error;
440                 }
441         }
442
443         INC_STAT(input_slowpath);
444
445         if (!req->headers)
446                 req->headers = tux_kmalloc(tux_max_header_len);
447
448         /* First, read the data */
449         len = read_request(req->sock, (char *)req->headers, tux_max_header_len-1);
450         if (len < 0) {
451                 req->error = TUX_ERROR_CONN_CLOSE;
452                 goto redirect_error;
453         }
454         if (!len)
455                 GOTO_INCOMPLETE;
456
457         /*
458          * Make it a zero-delimited string to automatically get
459          * protection against various buffer overflow situations.
460          * Then pass it to the TUX application protocol stack.
461          */
462         ((char *)req->headers)[len] = 0;
463         req->headers_len = len;
464
465         parsed_len = req->proto->parse_message(req, len);
466
467         /*
468          * Is the request fully read? (or is there any error)
469          */
470         if (parsed_len < 0)
471                 GOTO_REDIRECT;
472         if (!parsed_len) {
473                 /*
474                  * Push pending ACK which was delayed due to the
475                  * pingpong optimization:
476                  */
477                 if (was_keepalive) {
478                         lock_sock(sk);
479                         tp->ack.pingpong = 0;
480                         tp->ack.pending |= TCP_ACK_PUSHED;
481                         cleanup_rbuf(sk, 1);
482                         release_sock(sk);
483                 }
484                 if (len >= tux_max_header_len-1)
485                         GOTO_REDIRECT;
486                 GOTO_INCOMPLETE;
487         }
488         unidle_req(req);
489
490         tp->nonagle = 2;
491
492         add_req_to_workqueue(req);
493         return;
494
495 redirect:
496         TDprintk("req %p will be redirected!\n", req);
497         req_err(req);
498
499 redirect_error:
500         unidle_req(req);
501
502         if (len < 0)
503                 req->parsed_len = 0;
504         else
505                 req->parsed_len = len;
506
507         INC_STAT(parse_static_redirect);
508         if (req->headers)
509                 kfree(req->headers);
510         req->headers = NULL;
511         if (req->error)
512                 zap_request(req, cachemiss);
513         return;
514
515 incomplete:
516         if (req->error)
517                 goto redirect_error;
518         if (tp->urg_data && !(tp->urg_data & TCP_URG_READ))
519                 goto restart;
520
521         add_tux_atom(req, parse_request);
522         INC_STAT(parse_static_incomplete);
523         tux_push_req(req);
524 }
525
526 int process_requests (threadinfo_t *ti, tux_req_t **user_req)
527 {
528         struct list_head *head, *curr;
529         int count = 0;
530         tux_req_t *req;
531
532         *user_req = NULL;
533
534 restart_loop:
535         spin_lock_irq(&ti->work_lock);
536         head = &ti->work_pending;
537         curr = head->next;
538         
539         if (curr != head) {
540                 int i;
541
542                 req = list_entry(curr, tux_req_t, work);
543                 Dprintk("PROCESS req %p <%p>.\n",
544                         req, __builtin_return_address(0));
545                 for (i = 0; i < req->atom_idx; i++)
546                         Dprintk("... atom %d: %p\n", i, req->atoms[i]);
547
548                 if (req->ti != ti)
549                         TUX_BUG();
550                 if (req->magic != TUX_MAGIC)
551                         TUX_BUG();
552
553                 if (list_empty(&req->work))
554                         TUX_BUG();
555                 list_del(curr);
556                 DEBUG_DEL_LIST(&req->work);
557                 spin_unlock_irq(&ti->work_lock);
558
559                 if (!req->atom_idx) {
560                         if (req->usermode) {
561                                 *user_req = req;
562                                 return count;
563                         }
564                         /*
565                          * idx == 0 requests are flushed automatically.
566                          */
567                         flush_request(req, 0);
568                 } else
569                         tux_schedule_atom(req, 0);
570                 count++;
571                 goto restart_loop;
572         }
573         spin_unlock_irq(&ti->work_lock);
574
575         return count;
576 }
577
578 int tux_flush_workqueue (threadinfo_t *ti)
579 {
580         struct list_head *head, *curr, *next;
581         tux_req_t *req;
582         int count = 0;
583
584 restart:
585         spin_lock_irq(&ti->work_lock);
586         head = &ti->work_pending;
587         curr = head->next;
588
589         if (curr != head) {
590                 req = list_entry(curr, tux_req_t, work);
591                 next = curr->next;
592                 clear_bit(0, &req->idle_input);
593                 clear_bit(0, &req->wait_output_space);
594                 if (list_empty(&req->work))
595                         TUX_BUG();
596                 list_del(curr);
597                 DEBUG_DEL_LIST(curr);
598                 DEC_STAT(nr_input_pending);
599                 spin_unlock_irq(&ti->work_lock);
600 #if CONFIG_TUX_DEBUG
601                 req->bytes_expected = 0;
602 #endif
603                 req->in_file->f_pos = 0;
604                 req->atom_idx = 0;
605                 clear_keepalive(req);
606                 req->status = -1;
607                 if (req->usermode) {
608                         req->usermode = 0;
609                         req->private = 0;
610                 }
611                 flush_request(req, 0);
612                 count++;
613                 goto restart;
614         }
615         spin_unlock_irq(&ti->work_lock);
616
617         return count;
618 }
619
620 int print_all_requests (threadinfo_t *ti)
621 {
622         struct list_head *head, *curr;
623         tux_req_t *req;
624         int count = 0;
625
626         spin_lock_irq(&ti->work_lock);
627         head = &ti->all_requests;
628         curr = head->next;
629
630         while (curr != head) {
631                 req = list_entry(curr, tux_req_t, all);
632                 curr = curr->next;
633                 print_req(req);
634                 count++;
635         }
636         spin_unlock_irq(&ti->work_lock);
637
638         return count;
639 }
640