2 * TUX - Integrated Application Protocols Layer and Object Cache
4 * Copyright (C) 2000, 2001, Ingo Molnar <mingo@redhat.com>
6 * accept.c: accept new connections, allocate requests
11 /****************************************************************
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2, or (at your option)
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 ****************************************************************/
28 unsigned int tux_ack_pingpong = 1;
29 unsigned int tux_push_all = 0;
30 unsigned int tux_zerocopy_parse = 1;
32 static int __idle_event (tux_req_t *req);
33 static int __output_space_event (tux_req_t *req);
35 struct socket * start_listening(tux_socket_t *listen, int nr)
37 struct sockaddr_in sin;
38 struct socket *sock = NULL;
42 u16 port = listen->port;
43 u32 addr = listen->ip;
44 tux_proto_t *proto = listen->proto;
46 /* Create a listening socket: */
48 err = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
50 printk(KERN_ERR "TUX: error %d creating socket.\n", err);
54 /* Bind the socket: */
56 sin.sin_family = AF_INET;
57 sin.sin_addr.s_addr = htonl(addr);
58 sin.sin_port = htons(port);
62 sock_set_flag(sk, SOCK_URGINLINE);
64 err = sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
66 printk(KERN_ERR "TUX: error %d binding socket. This means that probably some other process is (or was a short time ago) using addr %s://%d.%d.%d.%d:%d.\n",
67 err, proto->name, HIPQUAD(addr), port);
72 Dprintk("listen sk accept_queue: %p/%p.\n",
73 tp->accept_queue, tp->accept_queue_tail);
74 tp->ack.pingpong = tux_ack_pingpong;
76 sock_reset_flag(sk, SOCK_LINGER);
77 sk->sk_lingertime = 0;
78 tp->linger2 = tux_keepalive_timeout * HZ;
80 if (proto->defer_accept && !tux_keepalive_timeout && tux_defer_accept)
83 /* Now, start listening on the socket */
85 err = sock->ops->listen(sock, tux_max_backlog);
87 printk(KERN_ERR "TUX: error %d listening on socket.\n", err);
91 printk(KERN_NOTICE "TUX: thread %d listens on %s://%d.%d.%d.%d:%d.\n",
92 nr, proto->name, HIPQUAD(addr), port);
101 static inline void __kfree_req (tux_req_t *req, threadinfo_t * ti)
104 DEBUG_DEL_LIST(&req->all);
109 int flush_freequeue (threadinfo_t * ti)
111 struct list_head *tmp;
116 spin_lock_irqsave(&ti->free_requests_lock,flags);
117 while (ti->nr_free_requests) {
118 ti->nr_free_requests--;
119 tmp = ti->free_requests.next;
120 req = list_entry(tmp, tux_req_t, free);
123 DEC_STAT(nr_free_pending);
124 __kfree_req(req, ti);
127 spin_unlock_irqrestore(&ti->free_requests_lock,flags);
132 static tux_req_t * kmalloc_req (threadinfo_t * ti)
134 struct list_head *tmp;
138 spin_lock_irqsave(&ti->free_requests_lock, flags);
139 if (ti->nr_free_requests) {
140 ti->nr_free_requests--;
141 tmp = ti->free_requests.next;
142 req = list_entry(tmp, tux_req_t, free);
145 DEC_STAT(nr_free_pending);
146 req->magic = TUX_MAGIC;
147 spin_unlock_irqrestore(&ti->free_requests_lock, flags);
149 spin_unlock_irqrestore(&ti->free_requests_lock, flags);
150 req = tux_kmalloc(sizeof(*req));
152 memset (req, 0, sizeof(*req));
153 list_add(&req->all, &ti->all_requests);
155 req->magic = TUX_MAGIC;
156 INC_STAT(nr_allocated);
157 init_waitqueue_entry(&req->sleep, current);
158 init_waitqueue_entry(&req->ftp_sleep, current);
159 INIT_LIST_HEAD(&req->work);
160 INIT_LIST_HEAD(&req->free);
161 INIT_LIST_HEAD(&req->lru);
163 req->total_bytes = 0;
164 SET_TIMESTAMP(req->accept_timestamp);
165 req->first_timestamp = jiffies;
167 init_timer(&req->keepalive_timer);
168 init_timer(&req->output_timer);
170 Dprintk("allocated NEW req %p.\n", req);
174 void kfree_req (tux_req_t *req)
176 threadinfo_t * ti = req->ti;
179 Dprintk("freeing req %p.\n", req);
181 if (req->magic != TUX_MAGIC)
183 spin_lock_irqsave(&ti->free_requests_lock,flags);
185 DEC_STAT(nr_allocated);
186 if (req->sock || req->dentry || req->private)
188 if (ti->nr_free_requests > tux_max_free_requests)
189 __kfree_req(req, ti);
192 ti->nr_free_requests++;
194 // the free requests queue is LIFO
195 list_add(&req->free, &ti->free_requests);
196 INC_STAT(nr_free_pending);
198 spin_unlock_irqrestore(&ti->free_requests_lock,flags);
201 static void __add_req_to_workqueue (tux_req_t *req)
203 threadinfo_t *ti = req->ti;
205 if (!list_empty(&req->work))
207 Dprintk("work-queueing request %p at %p/%p.\n", req, __builtin_return_address(0), __builtin_return_address(1));
208 if (connection_too_fast(req))
209 list_add_tail(&req->work, &ti->work_pending);
211 list_add(&req->work, &ti->work_pending);
212 INC_STAT(nr_work_pending);
213 wake_up_process(ti->thread);
217 void add_req_to_workqueue (tux_req_t *req)
220 threadinfo_t *ti = req->ti;
222 spin_lock_irqsave(&ti->work_lock, flags);
223 __add_req_to_workqueue(req);
224 spin_unlock_irqrestore(&ti->work_lock, flags);
227 void del_output_timer (tux_req_t *req)
230 if (!spin_is_locked(&req->ti->work_lock))
233 if (!list_empty(&req->lru)) {
235 DEBUG_DEL_LIST(&req->lru);
238 Dprintk("del output timeout for req %p.\n", req);
239 del_timer(&req->output_timer);
242 static void output_timeout_fn (unsigned long data);
244 #define OUTPUT_TIMEOUT HZ
246 static void add_output_timer (tux_req_t *req)
248 struct timer_list *timer = &req->output_timer;
250 timer->data = (unsigned long) req;
251 timer->function = &output_timeout_fn;
252 mod_timer(timer, jiffies + OUTPUT_TIMEOUT);
255 static void output_timeout_fn (unsigned long data)
257 tux_req_t *req = (tux_req_t *)data;
259 if (connection_too_fast(req)) {
260 add_output_timer(req);
261 // mod_timer(&req->output_timer, jiffies + OUTPUT_TIMEOUT);
264 output_space_event(req);
267 void output_timeout (tux_req_t *req)
269 Dprintk("output timeout for req %p.\n", req);
270 if (test_and_set_bit(0, &req->wait_output_space))
272 INC_STAT(nr_output_space_pending);
273 add_output_timer(req);
276 void __del_keepalive_timer (tux_req_t *req)
279 if (!spin_is_locked(&req->ti->work_lock))
282 if (!list_empty(&req->lru)) {
284 DEBUG_DEL_LIST(&req->lru);
287 Dprintk("del keepalive timeout for req %p.\n", req);
288 del_timer(&req->keepalive_timer);
291 static void keepalive_timeout_fn (unsigned long data)
293 tux_req_t *req = (tux_req_t *)data;
296 Dprintk("req %p timed out after %d sec!\n", req, tux_keepalive_timeout);
300 Dprintk("req->error = TUX_ERROR_CONN_TIMEOUT!\n");
301 req->error = TUX_ERROR_CONN_TIMEOUT;
302 if (!idle_event(req))
303 output_space_event(req);
306 void __add_keepalive_timer (tux_req_t *req)
308 struct timer_list *timer = &req->keepalive_timer;
310 if (!tux_keepalive_timeout)
313 if (!spin_is_locked(&req->ti->work_lock))
317 if (!list_empty(&req->lru))
319 if (req->ti->nr_lru > tux_max_keepalives) {
320 struct list_head *head, *last;
323 head = &req->ti->lru;
327 last_req = list_entry(last, tux_req_t, lru);
329 DEBUG_DEL_LIST(last);
332 Dprintk("LRU-aging req %p!\n", last_req);
333 last_req->error = TUX_ERROR_CONN_TIMEOUT;
334 if (!__idle_event(last_req))
335 __output_space_event(last_req);
337 list_add(&req->lru, &req->ti->lru);
340 timer->expires = jiffies + tux_keepalive_timeout * HZ;
341 timer->data = (unsigned long) req;
342 timer->function = &keepalive_timeout_fn;
346 static int __output_space_event (tux_req_t *req)
348 if (!req || (req->magic != TUX_MAGIC))
351 if (!test_and_clear_bit(0, &req->wait_output_space)) {
352 Dprintk("output space ready event at <%p>, on non-idle %p.\n", __builtin_return_address(0), req);
356 Dprintk("output space ready event at <%p>, %p was waiting!\n", __builtin_return_address(0), req);
357 DEC_STAT(nr_output_space_pending);
359 del_keepalive_timer(req);
360 del_output_timer(req);
362 __add_req_to_workqueue(req);
366 int output_space_event (tux_req_t *req)
371 spin_lock_irqsave(&req->ti->work_lock, flags);
372 ret = __output_space_event(req);
373 spin_unlock_irqrestore(&req->ti->work_lock, flags);
378 static int __idle_event (tux_req_t *req)
383 if (!req || (req->magic != TUX_MAGIC))
387 if (!test_and_clear_bit(0, &req->idle_input)) {
388 Dprintk("data ready event at <%p>, on non-idle %p.\n", __builtin_return_address(0), req);
392 Dprintk("data ready event at <%p>, %p was idle!\n", __builtin_return_address(0), req);
393 del_keepalive_timer(req);
394 del_output_timer(req);
395 DEC_STAT(nr_idle_input_pending);
397 tp = tcp_sk(req->sock->sk);
399 tp->ack.pingpong = tux_ack_pingpong;
400 SET_TIMESTAMP(req->accept_timestamp);
402 __add_req_to_workqueue(req);
407 int idle_event (tux_req_t *req)
412 spin_lock_irqsave(&req->ti->work_lock, flags);
413 ret = __idle_event(req);
414 spin_unlock_irqrestore(&req->ti->work_lock, flags);
419 #define HANDLE_CALLBACK_1(callback, tux_name, real_name, param...) \
422 read_lock(&sk->sk_callback_lock); \
423 req = sk->sk_user_data; \
425 Dprintk("callback "#callback"(%p) req %p.\n", \
426 sk->sk_##callback, req); \
429 if (sk->sk_##callback == tux_name) { \
430 printk("BUG: "#callback" "#tux_name" "#real_name" no req!"); \
433 read_unlock(&sk->sk_callback_lock); \
434 if (sk->sk_##callback) \
435 sk->sk_##callback(param); \
439 #define HANDLE_CALLBACK_2(callback, tux_name, real_name, param...) \
440 Dprintk(#tux_name"() on %p.\n", req); \
441 if (req->magic != TUX_MAGIC) \
443 if (req->real_name) \
444 req->real_name(param);
446 #define HANDLE_CALLBACK(callback, tux_name, real_name, param...) \
447 HANDLE_CALLBACK_1(callback,tux_name,real_name,param) \
448 HANDLE_CALLBACK_2(callback,tux_name,real_name,param)
450 static void tux_data_ready (struct sock *sk, int len)
452 HANDLE_CALLBACK_1(data_ready, tux_data_ready, real_data_ready, sk, len);
454 if (!idle_event(req))
455 output_space_event(req);
456 read_unlock(&sk->sk_callback_lock);
459 static void tux_write_space (struct sock *sk)
461 HANDLE_CALLBACK(write_space, tux_write_space, real_write_space, sk);
463 Dprintk("sk->sk_wmem_queued: %d, sk->sk_sndbuf: %d.\n",
464 sk->sk_wmem_queued, sk->sk_sndbuf);
466 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
467 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
468 if (!idle_event(req))
469 output_space_event(req);
471 read_unlock(&sk->sk_callback_lock);
474 static void tux_error_report (struct sock *sk)
476 HANDLE_CALLBACK(error_report, tux_error_report, real_error_report, sk);
478 req->error = TUX_ERROR_CONN_CLOSE;
479 if (!idle_event(req))
480 output_space_event(req);
481 read_unlock(&sk->sk_callback_lock);
484 static void tux_state_change (struct sock *sk)
486 HANDLE_CALLBACK(state_change, tux_state_change, real_state_change, sk);
488 if (req->sock && req->sock->sk &&
489 (req->sock->sk->sk_state > TCP_ESTABLISHED)) {
490 Dprintk("req %p changed to TCP non-established!\n", req);
491 Dprintk("req->sock: %p\n", req->sock);
493 Dprintk("req->sock->sk: %p\n", req->sock->sk);
494 if (req->sock && req->sock->sk)
495 Dprintk("TCP state: %d\n", req->sock->sk->sk_state);
496 Dprintk("req->error = TUX_ERROR_CONN_CLOSE!\n");
497 req->error = TUX_ERROR_CONN_CLOSE;
499 if (!idle_event(req))
500 output_space_event(req);
501 read_unlock(&sk->sk_callback_lock);
504 static void tux_destruct (struct sock *sk)
509 static void tux_ftp_data_ready (struct sock *sk, int len)
511 HANDLE_CALLBACK_1(data_ready, tux_ftp_data_ready,
512 ftp_real_data_ready, sk, len);
513 if (!idle_event(req))
514 output_space_event(req);
515 read_unlock(&sk->sk_callback_lock);
518 static void tux_ftp_write_space (struct sock *sk)
520 HANDLE_CALLBACK_1(write_space, tux_ftp_write_space,
521 ftp_real_write_space, sk);
523 Dprintk("sk->sk_wmem_queued: %d, sk->sk_sndbuf: %d.\n",
524 sk->sk_wmem_queued, sk->sk_sndbuf);
526 if (sk_stream_wspace(sk) >= sk->sk_sndbuf/10*8) {
527 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
528 if (!idle_event(req))
529 output_space_event(req);
531 read_unlock(&sk->sk_callback_lock);
534 static void tux_ftp_error_report (struct sock *sk)
536 HANDLE_CALLBACK(error_report, tux_ftp_error_report,
537 ftp_real_error_report, sk);
539 TDprintk("req %p sock %p got TCP errors on FTP data connection!\n", req, sk);
540 TDprintk("req->error = TUX_ERROR_CONN_CLOSE!\n");
541 req->error = TUX_ERROR_CONN_CLOSE;
542 if (!idle_event(req))
543 output_space_event(req);
544 read_unlock(&sk->sk_callback_lock);
547 static void tux_ftp_state_change (struct sock *sk)
549 HANDLE_CALLBACK(state_change, tux_ftp_state_change,
550 ftp_real_state_change, sk);
552 if (req->sock && req->sock->sk &&
553 (req->sock->sk->sk_state > TCP_ESTABLISHED)) {
554 Dprintk("req %p FTP control sock changed to TCP non-established!\n", req);
555 Dprintk("req->sock: %p\n", req->sock);
556 TDprintk("req->error = TUX_ERROR_CONN_CLOSE!\n");
558 req->error = TUX_ERROR_CONN_CLOSE;
560 if (!idle_event(req))
561 output_space_event(req);
562 read_unlock(&sk->sk_callback_lock);
565 static void tux_ftp_create_child (struct sock *sk, struct sock *newsk)
567 HANDLE_CALLBACK(create_child, tux_ftp_create_child,
568 ftp_real_create_child, sk, newsk);
570 newsk->sk_user_data = NULL;
571 newsk->sk_data_ready = req->ftp_real_data_ready;
572 newsk->sk_state_change = req->ftp_real_state_change;
573 newsk->sk_write_space = req->ftp_real_write_space;
574 newsk->sk_error_report = req->ftp_real_error_report;
575 newsk->sk_create_child = req->ftp_real_create_child;
576 newsk->sk_destruct = req->ftp_real_destruct;
578 if (!idle_event(req))
579 output_space_event(req);
580 read_unlock(&sk->sk_callback_lock);
583 static void tux_ftp_destruct (struct sock *sk)
588 static void link_tux_socket (tux_req_t *req, struct socket *sock)
590 struct sock *sk = sock->sk;
594 if (sk->sk_destruct == tux_destruct)
597 * (No need to lock the socket, we just want to
598 * make sure that events from now on go through
601 write_lock_irq(&sk->sk_callback_lock);
604 sk->sk_user_data = req;
606 req->real_data_ready = sk->sk_data_ready;
607 req->real_state_change = sk->sk_state_change;
608 req->real_write_space = sk->sk_write_space;
609 req->real_error_report = sk->sk_error_report;
610 req->real_destruct = sk->sk_destruct;
612 sk->sk_data_ready = tux_data_ready;
613 sk->sk_state_change = tux_state_change;
614 sk->sk_write_space = tux_write_space;
615 sk->sk_error_report = tux_error_report;
616 sk->sk_destruct = tux_destruct;
618 write_unlock_irq(&sk->sk_callback_lock);
620 if (req->real_destruct == tux_destruct)
622 req->client_addr = inet_sk(sk)->daddr;
623 req->client_port = inet_sk(sk)->dport;
625 add_wait_queue(sk->sk_sleep, &req->sleep);
628 void __link_data_socket (tux_req_t *req, struct socket *sock,
632 * (No need to lock the socket, we just want to
633 * make sure that events from now on go through
636 write_lock_irq(&sk->sk_callback_lock);
638 req->data_sock = sock;
639 sk->sk_user_data = req;
641 req->ftp_real_data_ready = sk->sk_data_ready;
642 req->ftp_real_state_change = sk->sk_state_change;
643 req->ftp_real_write_space = sk->sk_write_space;
644 req->ftp_real_error_report = sk->sk_error_report;
645 req->ftp_real_create_child = sk->sk_create_child;
646 req->ftp_real_destruct = sk->sk_destruct;
648 sk->sk_data_ready = tux_ftp_data_ready;
649 sk->sk_state_change = tux_ftp_state_change;
650 sk->sk_write_space = tux_ftp_write_space;
651 sk->sk_error_report = tux_ftp_error_report;
652 sk->sk_create_child = tux_ftp_create_child;
653 sk->sk_destruct = tux_ftp_destruct;
655 if (req->ftp_real_destruct == tux_ftp_destruct)
658 write_unlock_irq(&sk->sk_callback_lock);
660 add_wait_queue(sk->sk_sleep, &req->ftp_sleep);
663 void link_tux_data_socket (tux_req_t *req, struct socket *sock)
665 struct sock *sk = sock->sk;
669 if (sk->sk_destruct == tux_ftp_destruct)
671 __link_data_socket(req, sock, sk);
674 void unlink_tux_socket (tux_req_t *req)
678 if (!req->sock || !req->sock->sk)
682 write_lock_irq(&sk->sk_callback_lock);
683 if (!sk->sk_user_data)
685 if (req->real_destruct == tux_destruct)
688 sk->sk_user_data = NULL;
690 sk->sk_data_ready = req->real_data_ready;
691 sk->sk_state_change = req->real_state_change;
692 sk->sk_write_space = req->real_write_space;
693 sk->sk_error_report = req->real_error_report;
694 sk->sk_destruct = req->real_destruct;
696 if (sk->sk_destruct == tux_destruct)
699 req->real_data_ready = NULL;
700 req->real_state_change = NULL;
701 req->real_write_space = NULL;
702 req->real_error_report = NULL;
703 req->real_destruct = NULL;
705 write_unlock_irq(&sk->sk_callback_lock);
707 remove_wait_queue(sk->sk_sleep, &req->sleep);
710 void unlink_tux_data_socket (tux_req_t *req)
714 if (!req->data_sock || !req->data_sock->sk)
716 sk = req->data_sock->sk;
718 write_lock_irq(&sk->sk_callback_lock);
720 if (req->real_destruct == tux_ftp_destruct)
723 sk->sk_user_data = NULL;
724 sk->sk_data_ready = req->ftp_real_data_ready;
725 sk->sk_state_change = req->ftp_real_state_change;
726 sk->sk_write_space = req->ftp_real_write_space;
727 sk->sk_error_report = req->ftp_real_error_report;
728 sk->sk_create_child = req->ftp_real_create_child;
729 sk->sk_destruct = req->ftp_real_destruct;
731 req->ftp_real_data_ready = NULL;
732 req->ftp_real_state_change = NULL;
733 req->ftp_real_write_space = NULL;
734 req->ftp_real_error_report = NULL;
735 req->ftp_real_create_child = NULL;
736 req->ftp_real_destruct = NULL;
738 write_unlock_irq(&sk->sk_callback_lock);
740 if (sk->sk_destruct == tux_ftp_destruct)
743 remove_wait_queue(sk->sk_sleep, &req->ftp_sleep);
746 void add_tux_atom (tux_req_t *req, atom_func_t *atom)
748 Dprintk("adding TUX atom %p to req %p, atom_idx: %d, at %p/%p.\n",
749 atom, req, req->atom_idx, __builtin_return_address(0), __builtin_return_address(1));
750 if (req->atom_idx == MAX_TUX_ATOMS)
752 req->atoms[req->atom_idx] = atom;
756 void del_tux_atom (tux_req_t *req)
761 Dprintk("removing TUX atom %p to req %p, atom_idx: %d, at %p.\n",
762 req->atoms[req->atom_idx], req, req->atom_idx, __builtin_return_address(0));
765 void tux_schedule_atom (tux_req_t *req, int cachemiss)
767 if (!list_empty(&req->work))
772 Dprintk("DOING TUX atom %p, req %p, atom_idx: %d, at %p.\n",
773 req->atoms[req->atom_idx], req, req->atom_idx, __builtin_return_address(0));
775 req->atoms[req->atom_idx](req, cachemiss);
777 Dprintk("DONE TUX atom %p, req %p, atom_idx: %d, at %p.\n",
778 req->atoms[req->atom_idx], req, req->atom_idx, __builtin_return_address(0));
782 * Puts newly accepted connections into the inputqueue. This is the
783 * first step in the life of a TUX request.
785 int accept_requests (threadinfo_t *ti)
787 int count = 0, last_count = 0, error, socknr = 0;
788 struct socket *sock, *new_sock;
789 struct tcp_opt *tp1, *tp2;
792 if (ti->nr_requests > tux_max_connect)
796 for (socknr = 0; socknr < CONFIG_TUX_NUMSOCKETS; socknr++) {
797 tux_listen_t *tux_listen;
799 tux_listen = ti->listen + socknr;
800 sock = tux_listen->sock;
803 if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
806 tp1 = tcp_sk(sock->sk);
808 * Quick test to see if there are connections on the queue.
809 * This is cheaper than accept() itself because this saves us
810 * the allocation of a new socket. (Which doesn't seem to be
813 if (tp1->accept_queue) {
817 __set_task_state(current, TASK_RUNNING);
819 new_sock = sock_alloc();
823 new_sock->type = sock->type;
824 new_sock->ops = sock->ops;
826 error = sock->ops->accept(sock, new_sock, O_NONBLOCK);
829 if (new_sock->sk->sk_state != TCP_ESTABLISHED)
832 tp2 = tcp_sk(new_sock->sk);
834 tp2->ack.pingpong = tux_ack_pingpong;
835 new_sock->sk->sk_reuse = 1;
836 sock_set_flag(new_sock->sk, SOCK_URGINLINE);
838 /* Allocate a request-entry for the connection */
839 req = kmalloc_req(ti);
842 link_tux_socket(req, new_sock);
844 proto = req->proto = tux_listen->proto;
846 proto->got_request(req);
849 if (count != last_count) {
856 sock_release(new_sock);