Merge to Fedora kernel-2.6.18-1.2224_FC5 patched with stable patch-2.6.18.1-vs2.0...
[linux-2.6.git] / net / tux / input.c
1 /*
2  * TUX - Integrated Application Protocols Layer and Object Cache
3  *
4  * Copyright (C) 2000, 2001, Ingo Molnar <mingo@redhat.com>
5  *
6  * input.c: handle requests arriving on accepted connections
7  */
8
9 #include <net/tux.h>
10 #include <linux/kmod.h>
11
12 /****************************************************************
13  *      This program is free software; you can redistribute it and/or modify
14  *      it under the terms of the GNU General Public License as published by
15  *      the Free Software Foundation; either version 2, or (at your option)
16  *      any later version.
17  *
18  *      This program is distributed in the hope that it will be useful,
19  *      but WITHOUT ANY WARRANTY; without even the implied warranty of
20  *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  *      GNU General Public License for more details.
22  *
23  *      You should have received a copy of the GNU General Public License
24  *      along with this program; if not, write to the Free Software
25  *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26  *
27  ****************************************************************/
28
29 void zap_request (tux_req_t *req, int cachemiss)
30 {
31         if (!req->error)
32                 TUX_BUG();
33         if (req->error == TUX_ERROR_CONN_TIMEOUT) {
34                 if (req->proto->request_timeout) {
35                         clear_keepalive(req);
36                         req->proto->request_timeout(req, cachemiss);
37                 } else {
38                         clear_keepalive(req);
39                         if (!cachemiss)
40                                 flush_request(req, 0);
41                         else {
42                                 add_tux_atom(req, flush_request);
43                                 add_req_to_workqueue(req);
44                         }
45                 }
46                 return;
47         }
48
49         if (!cachemiss && (req->error == TUX_ERROR_CONN_CLOSE)) {
50                 /*
51                  * Zap connection as fast as possible, there is
52                  * no valid client connection anymore:
53                  */
54                 clear_keepalive(req);
55                 flush_request(req, 0);
56         } else {
57                 if (req->error == TUX_ERROR_CONN_CLOSE) {
58                         clear_keepalive(req);
59                         add_tux_atom(req, flush_request);
60                 } else
61                         /*
62                          * Potentially redirect to the secondary server:
63                          */
64                         add_tux_atom(req, redirect_request);
65                 add_req_to_workqueue(req);
66         }
67 }
68
69 void __switch_docroot(tux_req_t *req)
70 {
71         if (!req->docroot_dentry || !req->docroot_mnt)
72                 TUX_BUG();
73         set_fs_root(current->fs, req->docroot_mnt, req->docroot_dentry);
74 }
75
76 struct dentry * __tux_lookup (tux_req_t *req, const char *filename,
77                          struct nameidata *base, struct vfsmount **mnt)
78 {
79         int err;
80
81         err = path_walk(filename, base);
82         if (err) {
83                 Dprintk("path_walk() returned with %d!\n", err);
84                 return ERR_PTR(err);
85         }
86         if (*mnt)
87                 TUX_BUG();
88         *mnt = base->mnt;
89
90         return base->dentry;
91 }
92
93 int tux_permission (struct inode *inode)
94 {
95         umode_t mode;
96         int err;
97
98         mode = inode->i_mode;
99         Dprintk("URL inode mode: %08x.\n", mode);
100
101         if (mode & tux_mode_forbidden)
102                 return -2;
103         /*
104          * at least one bit in the 'allowed' set has to
105          * be present to allow access.
106          */
107         if (!(mode & tux_mode_allowed))
108                 return -3;
109         err = permission(inode,MAY_READ,NULL);
110         return err;
111 }
112
113 struct dentry * tux_lookup (tux_req_t *req, const char *filename,
114                         const unsigned int flag, struct vfsmount **mnt)
115 {
116         struct dentry *dentry;
117         struct nameidata base = { };
118
119         Dprintk("tux_lookup(%p, %s, %d, virtual: %d, host: %s (%d).)\n", req, filename, flag, req->virtual, req->host, req->host_len);
120
121         base.flags = LOOKUP_FOLLOW|flag;
122         base.last_type = LAST_ROOT;
123         if (req->objectname[0] == '/') {
124                 base.dentry = dget(req->docroot_dentry);
125                 base.mnt = mntget(req->docroot_mnt);
126         } else {
127                 if (!req->cwd_dentry) {
128                         req->cwd_dentry = dget(req->docroot_dentry);
129                         req->cwd_mnt = mntget(req->docroot_mnt);
130                 }
131                 base.dentry = req->cwd_dentry;
132                 dget(base.dentry);
133                 base.mnt = mntget(req->cwd_mnt);
134         }
135
136         switch_docroot(req);
137         dentry = __tux_lookup (req, filename, &base, mnt);
138
139         Dprintk("looked up {%s} == dentry %p.\n", filename, dentry);
140
141         if (dentry && !IS_ERR(dentry) && !dentry->d_inode)
142                 TUX_BUG();
143         return dentry;
144 }
145
146 int lookup_object (tux_req_t *req, const unsigned int flag)
147 {
148         struct vfsmount *mnt = NULL;
149         struct dentry *dentry = NULL;
150         int perm;
151
152         dentry = tux_lookup(req, req->objectname, flag, &mnt);
153         if (!dentry || IS_ERR(dentry)) {
154                 if (PTR_ERR(dentry) == -EWOULDBLOCKIO)
155                         goto cachemiss;
156                 goto abort;
157         }
158         perm = tux_permission(dentry->d_inode);
159         /*
160          * Only regular files allowed.
161          */
162         if ((perm < 0) || !S_ISREG(dentry->d_inode->i_mode)) {
163                 req->status = 403;
164                 goto abort;
165         }
166         req->total_file_len = dentry->d_inode->i_size;
167 out:
168         install_req_dentry(req, dentry, mnt);
169         return 0;
170 cachemiss:
171         return 1;
172 abort:
173         if (dentry) {
174                 if (!IS_ERR(dentry))
175                         dput(dentry);
176                 dentry = NULL;
177         }
178         if (mnt) {
179                 if (!IS_ERR(mnt))
180                         mntput(mnt);
181                 mnt = NULL;
182         }
183         req_err(req);
184         goto out;
185 }
186
187 void install_req_dentry (tux_req_t *req, struct dentry *dentry, struct vfsmount *mnt)
188 {
189         if (req->dentry)
190                 TUX_BUG();
191         req->dentry = dentry;
192         if (req->mnt)
193                 TUX_BUG();
194         req->mnt = mnt;
195         if (req->in_file && req->in_file->f_dentry)
196                 TUX_BUG();
197         if (dentry)
198                 req->in_file = dentry_open(dget(dentry), NULL, O_RDONLY);
199 }
200
201 void release_req_dentry (tux_req_t *req)
202 {
203         if (!req->dentry) {
204                 if (req->in_file && req->in_file->f_dentry)
205                         TUX_BUG();
206                 return;
207         }
208
209         fput(req->in_file);
210         req->in_file = NULL;
211         dput(req->dentry);
212         req->dentry = NULL;
213         mntput(req->mnt);
214         req->mnt = NULL;
215 }
216
217 int __connection_too_fast (tux_req_t *req)
218 {
219         unsigned long curr_bw, delta, bytes;
220
221         bytes = req->total_bytes + req->bytes_sent;
222         if (!bytes)
223                 return 1;
224
225         delta = jiffies - req->first_timestamp;
226         if (!delta)
227                 delta++;
228         curr_bw = bytes * HZ / delta;
229
230         if (curr_bw > tux_max_output_bandwidth)
231                 return 2;
232         return 0;
233 }
234
235 void unidle_req (tux_req_t *req)
236 {
237         threadinfo_t *ti = req->ti;
238
239         Dprintk("UNIDLE req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), req->sock, req->sock->sk, req->keep_alive, req->status);
240         spin_lock_irq(&ti->work_lock);
241         if (req->magic != TUX_MAGIC)
242                 TUX_BUG();
243         if (!test_and_clear_bit(0, &req->idle_input)) {
244                 Dprintk("unidling %p, wasnt idle!\n", req);
245                 if (list_empty(&req->work))
246                         TUX_BUG();
247                 list_del(&req->work);
248                 DEBUG_DEL_LIST(&req->work);
249                 DEC_STAT(nr_work_pending);
250         } else {
251                 del_keepalive_timer(req);
252                 DEC_STAT(nr_idle_input_pending);
253                 Dprintk("unidled %p.\n", req);
254         }
255         if (req->idle_input)
256                 TUX_BUG();
257         spin_unlock_irq(&ti->work_lock);
258 }
259
260 #define GOTO_INCOMPLETE do { Dprintk("incomplete at %s:%d.\n", __FILE__, __LINE__); goto incomplete; } while (0)
261 #define GOTO_REDIRECT do { TDprintk("redirect at %s:%d.\n", __FILE__, __LINE__); goto redirect; } while (0)
262 #define GOTO_REDIRECT_NONIDLE do { TDprintk("redirect at %s:%d.\n", __FILE__, __LINE__); goto redirect_nonidle; } while (0)
263
264 static int read_request (struct socket *sock, char *buf, int max_size)
265 {
266         mm_segment_t oldmm;
267         struct kiocb iocb;
268         struct msghdr msg;
269         struct iovec iov;
270
271         int len;
272
273         msg.msg_name     = NULL;
274         msg.msg_namelen  = 0;
275         msg.msg_iov      = &iov;
276         msg.msg_iovlen   = 1;
277         msg.msg_control  = NULL;
278         msg.msg_controllen = 0;
279         msg.msg_flags    = 0;
280
281         msg.msg_iov->iov_base = buf;
282         msg.msg_iov->iov_len  = max_size;
283
284         oldmm = get_fs(); set_fs(KERNEL_DS);
285
286 read_again:
287         init_sync_kiocb(&iocb, NULL);
288         len = sock->sk->sk_prot->recvmsg(&iocb, sock->sk, &msg, max_size,
289                                                 MSG_DONTWAIT, MSG_PEEK, NULL);
290         if (-EIOCBQUEUED == len)
291                 len = wait_on_sync_kiocb(&iocb);
292
293         /*
294          * We must not get a signal inbetween
295          */
296         if ((len == -EAGAIN) || (len == -ERESTARTSYS)) {
297                 if (!signal_pending(current)) {
298                         len = 0;
299                         goto out;
300                 }
301                 flush_all_signals();
302                 goto read_again;
303         }
304 out:
305         set_fs(oldmm);
306         return len;
307 }
308
309 /*
310  * We inline URG data so it's at the head of the normal receive queue.
311  */
312 static int zap_urg_data (struct socket *sock)
313 {
314         mm_segment_t oldmm;
315         struct msghdr msg;
316         struct iovec iov;
317         struct kiocb iocb;
318         int len;
319         char buf[10];
320
321         oldmm = get_fs(); set_fs(KERNEL_DS);
322
323         msg.msg_name            = NULL;
324         msg.msg_namelen         = 0;
325         msg.msg_iov             = &iov;
326         msg.msg_iovlen          = 1;
327         msg.msg_control         = NULL;
328         msg.msg_controllen      = 0;
329         msg.msg_flags           = 0;
330
331         msg.msg_iov->iov_base = buf;
332         msg.msg_iov->iov_len  = 2;
333
334 read_again:
335         init_sync_kiocb(&iocb, NULL);
336         len = sock->sk->sk_prot->recvmsg(&iocb, sock->sk, &msg, 2,
337                                                 MSG_DONTWAIT, 0, NULL);
338         if (-EIOCBQUEUED == len)
339                 len = wait_on_sync_kiocb(&iocb);
340         Dprintk("recvmsg(MSG_OOB) returned %d.\n", len);
341
342         /*
343          * We must not get a signal inbetween
344          */
345         if ((len == -EAGAIN) || (len == -ERESTARTSYS)) {
346                 if (!signal_pending(current)) {
347                         len = 0;
348                         goto out;
349                 }
350                 flush_all_signals();
351                 goto read_again;
352         }
353 out:
354         set_fs(oldmm);
355
356         Dprintk("in out:.. and will return %d.!\n", len);
357
358         return len;
359 }
360
361 void trunc_headers (tux_req_t *req)
362 {
363         struct sock *sk = req->sock->sk;
364         int len, addr_len = 0;
365         struct kiocb iocb;
366
367         if (!req->parsed_len)
368                 TUX_BUG();
369 repeat_trunc:
370         init_sync_kiocb(&iocb, NULL);
371         len = sk->sk_prot->recvmsg(&iocb, sk, NULL, req->parsed_len, 1, MSG_TRUNC, &addr_len);
372         if (-EIOCBQUEUED == len)
373                 len = wait_on_sync_kiocb(&iocb);
374         if ((len == -ERESTARTSYS) || (len == -EAGAIN)) {
375                 flush_all_signals();
376                 goto repeat_trunc;
377         }
378         Dprintk("truncated (TRUNC) %d bytes at %p. (wanted: %d.)\n", len, __builtin_return_address(0), req->parsed_len);
379
380
381
382         req->parsed_len = 0;
383 }
384
385 void print_req (tux_req_t *req)
386 {
387         struct sock *sk;
388
389         printk("PRINT req %p <%p>, sock %p\n",
390                         req, __builtin_return_address(0), req->sock);
391         printk("... idx: %d\n", req->atom_idx);
392         if (req->sock) {
393                 sk = req->sock->sk;
394                 printk("... sock %p, sk %p, sk->state: %d, sk->err: %d\n", req->sock, sk, sk->sk_state, sk->sk_err);
395                 printk("... write_queue: %d, receive_queue: %d, error_queue: %d, keepalive: %d, status: %d\n", !skb_queue_empty(&sk->sk_write_queue), !skb_queue_empty(&sk->sk_receive_queue), !skb_queue_empty(&sk->sk_error_queue), req->keep_alive, req->status);
396                 printk("...tp->send_head: %p\n", sk->sk_send_head);
397                 printk("...tp->snd_una: %08x\n", tcp_sk(sk)->snd_una);
398                 printk("...tp->snd_nxt: %08x\n", tcp_sk(sk)->snd_nxt);
399                 printk("...tp->packets_out: %08x\n", tcp_sk(sk)->packets_out);
400         }
401         printk("... meth:{%s}, uri:{%s}, query:{%s}, ver:{%s}\n", req->method_str ? req->method_str : "<null>", req->uri_str ? req->uri_str : "<null>", req->query_str ? req->query_str : "<null>", req->version_str ? req->version_str : "<null>");
402         printk("... post_data:{%s}(%d).\n", req->post_data_str, req->post_data_len);
403         printk("... headers: {%s}\n", req->headers);
404 }
405 /*
406  * parse_request() reads all available TCP/IP data and prepares
407  * the request if the TUX request is complete. (we can get TUX
408  * requests in several packets.) Invalid requests are redirected
409  * to the secondary server.
410  */
411
412 void parse_request (tux_req_t *req, int cachemiss)
413 {
414         int len, parsed_len;
415         struct sock *sk = req->sock->sk;
416         struct tcp_sock *tp = tcp_sk(sk);
417         struct inet_connection_sock *icsk = inet_csk(sk);
418         int was_keepalive = req->keep_alive;
419
420         if (req->magic != TUX_MAGIC)
421                 TUX_BUG();
422
423         SET_TIMESTAMP(req->parse_timestamp);
424
425         spin_lock_irq(&req->ti->work_lock);
426         add_keepalive_timer(req);
427         if (test_and_set_bit(0, &req->idle_input))
428                 TUX_BUG();
429         INC_STAT(nr_idle_input_pending);
430         spin_unlock_irq(&req->ti->work_lock);
431
432         Dprintk("idled request %p.\n", req);
433
434 restart:
435
436         if (tp->urg_data && !(tp->urg_data & TCP_URG_READ)) {
437                 len = zap_urg_data(req->sock);
438                 if (tp->urg_data && !(tp->urg_data & TCP_URG_READ)) {
439                         req->error = TUX_ERROR_CONN_CLOSE;
440                         goto redirect_error;
441                 }
442         }
443
444         INC_STAT(input_slowpath);
445
446         if (!req->headers)
447                 req->headers = tux_kmalloc(tux_max_header_len);
448
449         /* First, read the data */
450         len = read_request(req->sock, (char *)req->headers, tux_max_header_len-1);
451         if (len < 0) {
452                 req->error = TUX_ERROR_CONN_CLOSE;
453                 goto redirect_error;
454         }
455         if (!len)
456                 GOTO_INCOMPLETE;
457
458         /*
459          * Make it a zero-delimited string to automatically get
460          * protection against various buffer overflow situations.
461          * Then pass it to the TUX application protocol stack.
462          */
463         ((char *)req->headers)[len] = 0;
464         req->headers_len = len;
465
466         parsed_len = req->proto->parse_message(req, len);
467
468         /*
469          * Is the request fully read? (or is there any error)
470          */
471         if (parsed_len < 0)
472                 GOTO_REDIRECT;
473         if (!parsed_len) {
474                 /*
475                  * Push pending ACK which was delayed due to the
476                  * pingpong optimization:
477                  */
478                 if (was_keepalive) {
479                         lock_sock(sk);
480                         icsk->icsk_ack.pingpong = 0;
481                         icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
482                         tcp_cleanup_rbuf(sk, 1);
483                         release_sock(sk);
484                 }
485                 if (len >= tux_max_header_len-1)
486                         GOTO_REDIRECT;
487                 GOTO_INCOMPLETE;
488         }
489         unidle_req(req);
490
491         tp->nonagle = 2;
492
493         add_req_to_workqueue(req);
494         return;
495
496 redirect:
497         TDprintk("req %p will be redirected!\n", req);
498         req_err(req);
499
500 redirect_error:
501         unidle_req(req);
502
503         if (len < 0)
504                 req->parsed_len = 0;
505         else
506                 req->parsed_len = len;
507
508         INC_STAT(parse_static_redirect);
509         if (req->headers)
510                 kfree(req->headers);
511         req->headers = NULL;
512         if (req->error)
513                 zap_request(req, cachemiss);
514         return;
515
516 incomplete:
517         if (req->error)
518                 goto redirect_error;
519         if (tp->urg_data && !(tp->urg_data & TCP_URG_READ))
520                 goto restart;
521
522         add_tux_atom(req, parse_request);
523         INC_STAT(parse_static_incomplete);
524         tux_push_req(req);
525 }
526
527 int process_requests (threadinfo_t *ti, tux_req_t **user_req)
528 {
529         struct list_head *head, *curr;
530         int count = 0;
531         tux_req_t *req;
532
533         *user_req = NULL;
534
535 restart_loop:
536         spin_lock_irq(&ti->work_lock);
537         head = &ti->work_pending;
538         curr = head->next;
539
540         if (curr != head) {
541                 int i;
542
543                 req = list_entry(curr, tux_req_t, work);
544                 Dprintk("PROCESS req %p <%p>.\n",
545                         req, __builtin_return_address(0));
546                 for (i = 0; i < req->atom_idx; i++)
547                         Dprintk("... atom %d: %p\n", i, req->atoms[i]);
548
549                 if (req->ti != ti)
550                         TUX_BUG();
551                 if (req->magic != TUX_MAGIC)
552                         TUX_BUG();
553
554                 if (list_empty(&req->work))
555                         TUX_BUG();
556                 list_del(curr);
557                 DEBUG_DEL_LIST(&req->work);
558                 spin_unlock_irq(&ti->work_lock);
559
560                 if (!req->atom_idx) {
561                         if (req->usermode) {
562                                 *user_req = req;
563                                 return count;
564                         }
565                         /*
566                          * idx == 0 requests are flushed automatically.
567                          */
568                         flush_request(req, 0);
569                 } else
570                         tux_schedule_atom(req, 0);
571                 count++;
572                 goto restart_loop;
573         }
574         spin_unlock_irq(&ti->work_lock);
575
576         return count;
577 }
578
579 int tux_flush_workqueue (threadinfo_t *ti)
580 {
581         struct list_head *head, *curr, *next;
582         tux_req_t *req;
583         int count = 0;
584
585 restart:
586         spin_lock_irq(&ti->work_lock);
587         head = &ti->work_pending;
588         curr = head->next;
589
590         if (curr != head) {
591                 req = list_entry(curr, tux_req_t, work);
592                 next = curr->next;
593                 clear_bit(0, &req->idle_input);
594                 clear_bit(0, &req->wait_output_space);
595                 if (list_empty(&req->work))
596                         TUX_BUG();
597                 list_del(curr);
598                 DEBUG_DEL_LIST(curr);
599                 DEC_STAT(nr_input_pending);
600                 spin_unlock_irq(&ti->work_lock);
601 #ifdef CONFIG_TUX_DEBUG
602                 req->bytes_expected = 0;
603 #endif
604                 req->in_file->f_pos = 0;
605                 req->atom_idx = 0;
606                 clear_keepalive(req);
607                 req->status = -1;
608                 if (req->usermode) {
609                         req->usermode = 0;
610                         req->private = 0;
611                 }
612                 flush_request(req, 0);
613                 count++;
614                 goto restart;
615         }
616         spin_unlock_irq(&ti->work_lock);
617
618         return count;
619 }
620
621 int print_all_requests (threadinfo_t *ti)
622 {
623         struct list_head *head, *curr;
624         tux_req_t *req;
625         int count = 0;
626
627         spin_lock_irq(&ti->work_lock);
628         head = &ti->all_requests;
629         curr = head->next;
630
631         while (curr != head) {
632                 req = list_entry(curr, tux_req_t, all);
633                 curr = curr->next;
634                 print_req(req);
635                 count++;
636         }
637         spin_unlock_irq(&ti->work_lock);
638
639         return count;
640 }
641