/* * TUX - Integrated Application Protocols Layer and Object Cache * * Copyright (C) 2000, 2001, Ingo Molnar * * input.c: handle requests arriving on accepted connections */ #include #include /**************************************************************** * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * ****************************************************************/ void zap_request (tux_req_t *req, int cachemiss) { if (!req->error) TUX_BUG(); if (req->error == TUX_ERROR_CONN_TIMEOUT) { if (req->proto->request_timeout) { clear_keepalive(req); req->proto->request_timeout(req, cachemiss); } else { clear_keepalive(req); if (!cachemiss) flush_request(req, 0); else { add_tux_atom(req, flush_request); add_req_to_workqueue(req); } } return; } if (!cachemiss && (req->error == TUX_ERROR_CONN_CLOSE)) { /* * Zap connection as fast as possible, there is * no valid client connection anymore: */ clear_keepalive(req); flush_request(req, 0); } else { if (req->error == TUX_ERROR_CONN_CLOSE) { clear_keepalive(req); add_tux_atom(req, flush_request); } else /* * Potentially redirect to the secondary server: */ add_tux_atom(req, redirect_request); add_req_to_workqueue(req); } } void __switch_docroot(tux_req_t *req) { if (!req->docroot_dentry || !req->docroot_mnt) TUX_BUG(); set_fs_root(current->fs, req->docroot_mnt, req->docroot_dentry); } struct dentry * __tux_lookup (tux_req_t *req, const char *filename, struct nameidata *base, struct vfsmount **mnt) { int err; err = path_walk(filename, base); if (err) { Dprintk("path_walk() returned with %d!\n", err); return ERR_PTR(err); } if (*mnt) TUX_BUG(); *mnt = base->mnt; return base->dentry; } int tux_permission (struct inode *inode) { umode_t mode; int err; mode = inode->i_mode; Dprintk("URL inode mode: %08x.\n", mode); if (mode & tux_mode_forbidden) return -2; /* * at least one bit in the 'allowed' set has to * be present to allow access. */ if (!(mode & tux_mode_allowed)) return -3; err = permission(inode,MAY_READ,NULL); return err; } struct dentry * tux_lookup (tux_req_t *req, const char *filename, const unsigned int flag, struct vfsmount **mnt) { struct dentry *dentry; struct nameidata base = { }; Dprintk("tux_lookup(%p, %s, %d, virtual: %d, host: %s (%d).)\n", req, filename, flag, req->virtual, req->host, req->host_len); base.flags = LOOKUP_FOLLOW|flag; base.last_type = LAST_ROOT; if (req->objectname[0] == '/') { base.dentry = dget(req->docroot_dentry); base.mnt = mntget(req->docroot_mnt); } else { if (!req->cwd_dentry) { req->cwd_dentry = dget(req->docroot_dentry); req->cwd_mnt = mntget(req->docroot_mnt); } base.dentry = req->cwd_dentry; dget(base.dentry); base.mnt = mntget(req->cwd_mnt); } switch_docroot(req); dentry = __tux_lookup (req, filename, &base, mnt); Dprintk("looked up {%s} == dentry %p.\n", filename, dentry); if (dentry && !IS_ERR(dentry) && !dentry->d_inode) TUX_BUG(); return dentry; } int lookup_object (tux_req_t *req, const unsigned int flag) { struct vfsmount *mnt = NULL; struct dentry *dentry = NULL; int perm; dentry = tux_lookup(req, req->objectname, flag, &mnt); if (!dentry || IS_ERR(dentry)) { if (PTR_ERR(dentry) == -EWOULDBLOCKIO) goto cachemiss; goto abort; } perm = tux_permission(dentry->d_inode); /* * Only regular files allowed. */ if ((perm < 0) || !S_ISREG(dentry->d_inode->i_mode)) { req->status = 403; goto abort; } req->total_file_len = dentry->d_inode->i_size; out: install_req_dentry(req, dentry, mnt); return 0; cachemiss: return 1; abort: if (dentry) { if (!IS_ERR(dentry)) dput(dentry); dentry = NULL; } if (mnt) { if (!IS_ERR(mnt)) mntput(mnt); mnt = NULL; } req_err(req); goto out; } void install_req_dentry (tux_req_t *req, struct dentry *dentry, struct vfsmount *mnt) { if (req->dentry) TUX_BUG(); req->dentry = dentry; if (req->mnt) TUX_BUG(); req->mnt = mnt; if (req->in_file && req->in_file->f_dentry) TUX_BUG(); if (dentry) req->in_file = dentry_open(dget(dentry), NULL, O_RDONLY); } void release_req_dentry (tux_req_t *req) { if (!req->dentry) { if (req->in_file && req->in_file->f_dentry) TUX_BUG(); return; } fput(req->in_file); req->in_file = NULL; dput(req->dentry); req->dentry = NULL; mntput(req->mnt); req->mnt = NULL; } int __connection_too_fast (tux_req_t *req) { unsigned long curr_bw, delta, bytes; bytes = req->total_bytes + req->bytes_sent; if (!bytes) return 1; delta = jiffies - req->first_timestamp; if (!delta) delta++; curr_bw = bytes * HZ / delta; if (curr_bw > tux_max_output_bandwidth) return 2; return 0; } void unidle_req (tux_req_t *req) { threadinfo_t *ti = req->ti; Dprintk("UNIDLE req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), req->sock, req->sock->sk, req->keep_alive, req->status); spin_lock_irq(&ti->work_lock); if (req->magic != TUX_MAGIC) TUX_BUG(); if (!test_and_clear_bit(0, &req->idle_input)) { Dprintk("unidling %p, wasnt idle!\n", req); if (list_empty(&req->work)) TUX_BUG(); list_del(&req->work); DEBUG_DEL_LIST(&req->work); DEC_STAT(nr_work_pending); } else { del_keepalive_timer(req); DEC_STAT(nr_idle_input_pending); Dprintk("unidled %p.\n", req); } if (req->idle_input) TUX_BUG(); spin_unlock_irq(&ti->work_lock); } #define GOTO_INCOMPLETE do { Dprintk("incomplete at %s:%d.\n", __FILE__, __LINE__); goto incomplete; } while (0) #define GOTO_REDIRECT do { TDprintk("redirect at %s:%d.\n", __FILE__, __LINE__); goto redirect; } while (0) #define GOTO_REDIRECT_NONIDLE do { TDprintk("redirect at %s:%d.\n", __FILE__, __LINE__); goto redirect_nonidle; } while (0) static int read_request (struct socket *sock, char *buf, int max_size) { mm_segment_t oldmm; struct kiocb iocb; struct msghdr msg; struct iovec iov; int len; msg.msg_name = 0; msg.msg_namelen = 0; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; msg.msg_iov->iov_base = buf; msg.msg_iov->iov_len = max_size; oldmm = get_fs(); set_fs(KERNEL_DS); read_again: init_sync_kiocb(&iocb, NULL); len = sock->sk->sk_prot->recvmsg(&iocb, sock->sk, &msg, max_size, MSG_DONTWAIT, MSG_PEEK, NULL); if (-EIOCBQUEUED == len) len = wait_on_sync_kiocb(&iocb); /* * We must not get a signal inbetween */ if ((len == -EAGAIN) || (len == -ERESTARTSYS)) { if (!signal_pending(current)) { len = 0; goto out; } flush_all_signals(); goto read_again; } out: set_fs(oldmm); return len; } /* * We inline URG data so it's at the head of the normal receive queue. */ static int zap_urg_data (struct socket *sock) { mm_segment_t oldmm; struct msghdr msg; struct iovec iov; struct kiocb iocb; int len; char buf[10]; oldmm = get_fs(); set_fs(KERNEL_DS); msg.msg_name = 0; msg.msg_namelen = 0; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; msg.msg_iov->iov_base = buf; msg.msg_iov->iov_len = 2; read_again: init_sync_kiocb(&iocb, NULL); len = sock->sk->sk_prot->recvmsg(&iocb, sock->sk, &msg, 2, MSG_DONTWAIT, 0, NULL); if (-EIOCBQUEUED == len) len = wait_on_sync_kiocb(&iocb); Dprintk("recvmsg(MSG_OOB) returned %d.\n", len); /* * We must not get a signal inbetween */ if ((len == -EAGAIN) || (len == -ERESTARTSYS)) { if (!signal_pending(current)) { len = 0; goto out; } flush_all_signals(); goto read_again; } out: set_fs(oldmm); Dprintk("in out:.. and will return %d.!\n", len); return len; } void trunc_headers (tux_req_t *req) { struct sock *sk = req->sock->sk; int len, addr_len = 0; struct kiocb iocb; if (!req->parsed_len) TUX_BUG(); repeat_trunc: init_sync_kiocb(&iocb, NULL); len = sk->sk_prot->recvmsg(&iocb, sk, NULL, req->parsed_len, 1, MSG_TRUNC, &addr_len); if (-EIOCBQUEUED == len) len = wait_on_sync_kiocb(&iocb); if ((len == -ERESTARTSYS) || (len == -EAGAIN)) { flush_all_signals(); goto repeat_trunc; } Dprintk("truncated (TRUNC) %d bytes at %p. (wanted: %d.)\n", len, __builtin_return_address(0), req->parsed_len); req->parsed_len = 0; } void print_req (tux_req_t *req) { struct sock *sk; printk("PRINT req %p <%p>, sock %p\n", req, __builtin_return_address(0), req->sock); printk("... idx: %d\n", req->atom_idx); if (req->sock) { sk = req->sock->sk; printk("... sock %p, sk %p, sk->state: %d, sk->err: %d\n", req->sock, sk, sk->sk_state, sk->sk_err); printk("... write_queue: %d, receive_queue: %d, error_queue: %d, keepalive: %d, status: %d\n", !skb_queue_empty(&sk->sk_write_queue), !skb_queue_empty(&sk->sk_receive_queue), !skb_queue_empty(&sk->sk_error_queue), req->keep_alive, req->status); printk("...tp->send_head: %p\n", sk->sk_send_head); printk("...tp->snd_una: %08x\n", tcp_sk(sk)->snd_una); printk("...tp->snd_nxt: %08x\n", tcp_sk(sk)->snd_nxt); printk("...tp->packets_out: %08x\n", tcp_sk(sk)->packets_out); } printk("... meth:{%s}, uri:{%s}, query:{%s}, ver:{%s}\n", req->method_str ? req->method_str : "", req->uri_str ? req->uri_str : "", req->query_str ? req->query_str : "", req->version_str ? req->version_str : ""); printk("... post_data:{%s}(%d).\n", req->post_data_str, req->post_data_len); printk("... headers: {%s}\n", req->headers); } /* * parse_request() reads all available TCP/IP data and prepares * the request if the TUX request is complete. (we can get TUX * requests in several packets.) Invalid requests are redirected * to the secondary server. */ void parse_request (tux_req_t *req, int cachemiss) { int len, parsed_len; struct sock *sk = req->sock->sk; struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); int was_keepalive = req->keep_alive; if (req->magic != TUX_MAGIC) TUX_BUG(); SET_TIMESTAMP(req->parse_timestamp); spin_lock_irq(&req->ti->work_lock); add_keepalive_timer(req); if (test_and_set_bit(0, &req->idle_input)) TUX_BUG(); INC_STAT(nr_idle_input_pending); spin_unlock_irq(&req->ti->work_lock); Dprintk("idled request %p.\n", req); restart: if (tp->urg_data && !(tp->urg_data & TCP_URG_READ)) { len = zap_urg_data(req->sock); if (tp->urg_data && !(tp->urg_data & TCP_URG_READ)) { req->error = TUX_ERROR_CONN_CLOSE; goto redirect_error; } } INC_STAT(input_slowpath); if (!req->headers) req->headers = tux_kmalloc(tux_max_header_len); /* First, read the data */ len = read_request(req->sock, (char *)req->headers, tux_max_header_len-1); if (len < 0) { req->error = TUX_ERROR_CONN_CLOSE; goto redirect_error; } if (!len) GOTO_INCOMPLETE; /* * Make it a zero-delimited string to automatically get * protection against various buffer overflow situations. * Then pass it to the TUX application protocol stack. */ ((char *)req->headers)[len] = 0; req->headers_len = len; parsed_len = req->proto->parse_message(req, len); /* * Is the request fully read? (or is there any error) */ if (parsed_len < 0) GOTO_REDIRECT; if (!parsed_len) { /* * Push pending ACK which was delayed due to the * pingpong optimization: */ if (was_keepalive) { lock_sock(sk); icsk->icsk_ack.pingpong = 0; icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; cleanup_rbuf(sk, 1); release_sock(sk); } if (len >= tux_max_header_len-1) GOTO_REDIRECT; GOTO_INCOMPLETE; } unidle_req(req); tp->nonagle = 2; add_req_to_workqueue(req); return; redirect: TDprintk("req %p will be redirected!\n", req); req_err(req); redirect_error: unidle_req(req); if (len < 0) req->parsed_len = 0; else req->parsed_len = len; INC_STAT(parse_static_redirect); if (req->headers) kfree(req->headers); req->headers = NULL; if (req->error) zap_request(req, cachemiss); return; incomplete: if (req->error) goto redirect_error; if (tp->urg_data && !(tp->urg_data & TCP_URG_READ)) goto restart; add_tux_atom(req, parse_request); INC_STAT(parse_static_incomplete); tux_push_req(req); } int process_requests (threadinfo_t *ti, tux_req_t **user_req) { struct list_head *head, *curr; int count = 0; tux_req_t *req; *user_req = NULL; restart_loop: spin_lock_irq(&ti->work_lock); head = &ti->work_pending; curr = head->next; if (curr != head) { int i; req = list_entry(curr, tux_req_t, work); Dprintk("PROCESS req %p <%p>.\n", req, __builtin_return_address(0)); for (i = 0; i < req->atom_idx; i++) Dprintk("... atom %d: %p\n", i, req->atoms[i]); if (req->ti != ti) TUX_BUG(); if (req->magic != TUX_MAGIC) TUX_BUG(); if (list_empty(&req->work)) TUX_BUG(); list_del(curr); DEBUG_DEL_LIST(&req->work); spin_unlock_irq(&ti->work_lock); if (!req->atom_idx) { if (req->usermode) { *user_req = req; return count; } /* * idx == 0 requests are flushed automatically. */ flush_request(req, 0); } else tux_schedule_atom(req, 0); count++; goto restart_loop; } spin_unlock_irq(&ti->work_lock); return count; } int tux_flush_workqueue (threadinfo_t *ti) { struct list_head *head, *curr, *next; tux_req_t *req; int count = 0; restart: spin_lock_irq(&ti->work_lock); head = &ti->work_pending; curr = head->next; if (curr != head) { req = list_entry(curr, tux_req_t, work); next = curr->next; clear_bit(0, &req->idle_input); clear_bit(0, &req->wait_output_space); if (list_empty(&req->work)) TUX_BUG(); list_del(curr); DEBUG_DEL_LIST(curr); DEC_STAT(nr_input_pending); spin_unlock_irq(&ti->work_lock); #ifdef CONFIG_TUX_DEBUG req->bytes_expected = 0; #endif req->in_file->f_pos = 0; req->atom_idx = 0; clear_keepalive(req); req->status = -1; if (req->usermode) { req->usermode = 0; req->private = 0; } flush_request(req, 0); count++; goto restart; } spin_unlock_irq(&ti->work_lock); return count; } int print_all_requests (threadinfo_t *ti) { struct list_head *head, *curr; tux_req_t *req; int count = 0; spin_lock_irq(&ti->work_lock); head = &ti->all_requests; curr = head->next; while (curr != head) { req = list_entry(curr, tux_req_t, all); curr = curr->next; print_req(req); count++; } spin_unlock_irq(&ti->work_lock); return count; }