#include <linux/fs.h>
#include <linux/random.h>
+#ifdef CONFIG_CKRM
+#include <linux/ckrm.h>
+#endif
+
#include <net/icmp.h>
#include <net/tcp.h>
#include <net/xfrm.h>
void tcp_enter_memory_pressure(void)
{
if (!tcp_memory_pressure) {
- NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES);
+ NET_INC_STATS(TCPMemoryPressures);
tcp_memory_pressure = 1;
}
}
int tcp_listen_start(struct sock *sk)
{
+#ifdef CONFIG_ACCEPT_QUEUES
+ int i = 0;
+#endif
struct inet_opt *inet = inet_sk(sk);
struct tcp_opt *tp = tcp_sk(sk);
struct tcp_listen_opt *lopt;
sk->sk_max_ack_backlog = 0;
sk->sk_ack_backlog = 0;
+#ifdef CONFIG_ACCEPT_QUEUES
+ tp->accept_queue = NULL;
+#else
tp->accept_queue = tp->accept_queue_tail = NULL;
+#endif
tp->syn_wait_lock = RW_LOCK_UNLOCKED;
tcp_delack_init(tp);
break;
get_random_bytes(&lopt->hash_rnd, 4);
+#ifdef CONFIG_ACCEPT_QUEUES
+ tp->class_index = 0;
+ for (i=0; i < NUM_ACCEPT_QUEUES; i++) {
+ tp->acceptq[i].aq_tail = NULL;
+ tp->acceptq[i].aq_head = NULL;
+ tp->acceptq[i].aq_wait_time = 0;
+ tp->acceptq[i].aq_qcount = 0;
+ tp->acceptq[i].aq_count = 0;
+ if (i == 0) {
+ tp->acceptq[i].aq_ratio = 1;
+ }
+ else {
+ tp->acceptq[i].aq_ratio = 0;
+ }
+ }
+#endif
+
write_lock_bh(&tp->syn_wait_lock);
tp->listen_opt = lopt;
write_unlock_bh(&tp->syn_wait_lock);
sk_dst_reset(sk);
sk->sk_prot->hash(sk);
+#ifdef CONFIG_CKRM
+ ckrm_cb_listen_start(sk);
+#endif
+
return 0;
}
write_lock_bh(&tp->syn_wait_lock);
tp->listen_opt = NULL;
write_unlock_bh(&tp->syn_wait_lock);
- tp->accept_queue = tp->accept_queue_tail = NULL;
+
+#ifdef CONFIG_CKRM
+ ckrm_cb_listen_stop(sk);
+#endif
+
+#ifdef CONFIG_ACCEPT_QUEUES
+ for (i = 0; i < NUM_ACCEPT_QUEUES; i++)
+ tp->acceptq[i].aq_head = tp->acceptq[i].aq_tail = NULL;
+#else
+ tp->accept_queue_tail = NULL;
+#endif
+ tp->accept_queue = NULL;
if (lopt->qlen) {
for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
local_bh_enable();
sock_put(child);
+#ifdef CONFIG_ACCEPT_QUEUES
+ sk_acceptq_removed(sk, req->acceptq_class);
+#else
sk_acceptq_removed(sk);
+#endif
tcp_openreq_fastfree(req);
}
BUG_TRAP(!sk->sk_ack_backlog);
struct sk_buff *skb;
struct tcp_opt *tp = tcp_sk(sk);
- NET_ADD_STATS_USER(LINUX_MIB_TCPPREQUEUED, skb_queue_len(&tp->ucopy.prequeue));
+ NET_ADD_STATS_USER(TCPPrequeued, skb_queue_len(&tp->ucopy.prequeue));
/* RX process wants to run with disabled BHs, though it is not
* necessary */
/* __ Restore normal policy in scheduler __ */
if ((chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
+ NET_ADD_STATS_USER(TCPDirectCopyFromBacklog, chunk);
len -= chunk;
copied += chunk;
}
tcp_prequeue_process(sk);
if ((chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+ NET_ADD_STATS_USER(TCPDirectCopyFromPrequeue, chunk);
len -= chunk;
copied += chunk;
}
tcp_prequeue_process(sk);
if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
- NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+ NET_ADD_STATS_USER(TCPDirectCopyFromPrequeue, chunk);
len -= chunk;
copied += chunk;
}
*/
if (data_was_unread) {
/* Unread data was tossed, zap the connection. */
- NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE);
+ NET_INC_STATS_USER(TCPAbortOnClose);
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_KERNEL);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
- NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA);
+ NET_INC_STATS_USER(TCPAbortOnData);
} else if (tcp_close_state(sk)) {
/* We FIN if the application ate all the data before
* zapping the connection.
if (tp->linger2 < 0) {
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
+ NET_INC_STATS_BH(TCPAbortOnLinger);
} else {
int tmo = tcp_fin_time(tp);
"sockets\n");
tcp_set_state(sk, TCP_CLOSE);
tcp_send_active_reset(sk, GFP_ATOMIC);
- NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+ NET_INC_STATS_BH(TCPAbortOnMemory);
}
}
atomic_inc(&tcp_orphan_count);
struct open_request *req;
struct sock *newsk;
int error;
+#ifdef CONFIG_ACCEPT_QUEUES
+ int prev_class = 0;
+ int first;
+#endif
lock_sock(sk);
/* Find already established connection */
if (!tp->accept_queue) {
long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
-
/* If this is a non blocking socket don't sleep */
error = -EAGAIN;
if (!timeo)
goto out;
}
+#ifndef CONFIG_ACCEPT_QUEUES
req = tp->accept_queue;
if ((tp->accept_queue = req->dl_next) == NULL)
tp->accept_queue_tail = NULL;
+ newsk = req->sk;
+ sk_acceptq_removed(sk);
+#else
+ first = tp->class_index;
+ /* We should always have request queued here. The accept_queue
+ * is already checked for NULL above.
+ */
+ while(!tp->acceptq[first].aq_head) {
+ tp->acceptq[first].aq_cnt = 0;
+ first = (first+1) & ~NUM_ACCEPT_QUEUES;
+ }
+ req = tp->acceptq[first].aq_head;
+ tp->acceptq[first].aq_qcount--;
+ tp->acceptq[first].aq_count++;
+ tp->acceptq[first].aq_wait_time+=(jiffies - req->acceptq_time_stamp);
+ for (prev_class= first-1 ; prev_class >=0; prev_class--)
+ if (tp->acceptq[prev_class].aq_tail)
+ break;
+ if (prev_class>=0)
+ tp->acceptq[prev_class].aq_tail->dl_next = req->dl_next;
+ else
+ tp->accept_queue = req->dl_next;
+
+ if (req == tp->acceptq[first].aq_tail)
+ tp->acceptq[first].aq_head = tp->acceptq[first].aq_tail = NULL;
+ else
+ tp->acceptq[first].aq_head = req->dl_next;
+
+ if((++(tp->acceptq[first].aq_cnt)) >= tp->acceptq[first].aq_ratio){
+ tp->acceptq[first].aq_cnt = 0;
+ tp->class_index = ++first & (NUM_ACCEPT_QUEUES-1);
+ }
newsk = req->sk;
- sk_acceptq_removed(sk);
+ sk_acceptq_removed(sk, req->acceptq_class);
+#endif
tcp_openreq_fastfree(req);
BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
release_sock(sk);
return NULL;
}
+
/*
* Socket option code for TCP.
*/
}
}
break;
+
+#ifdef CONFIG_ACCEPT_QUEUES
+ case TCP_ACCEPTQ_SHARE:
+#ifdef CONFIG_CKRM
+ // If CKRM is set then the shares are set through rcfs.
+ // Get shares will still succeed.
+ err = -EOPNOTSUPP;
+ break;
+#else
+ {
+ char share_wt[NUM_ACCEPT_QUEUES];
+ int i,j;
+ if (sk->sk_state != TCP_LISTEN)
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(share_wt,optval, optlen)) {
+ err = -EFAULT;
+ break;
+ }
+ j = 0;
+ for (i = 0; i < NUM_ACCEPT_QUEUES; i++) {
+ if (share_wt[i]) {
+ if (!j)
+ j = share_wt[i];
+ else if (share_wt[i] < j) {
+ j = share_wt[i];
+ }
+ }
+ else
+ tp->acceptq[i].aq_ratio = 0;
+
+ }
+ if (j == 0) {
+ /* Class 0 is always valid. If nothing is
+ * specified set class 0 as 1.
+ */
+ share_wt[0] = 1;
+ j = 1;
+ }
+ for (i=0; i < NUM_ACCEPT_QUEUES; i++) {
+ tp->acceptq[i].aq_ratio = share_wt[i]/j;
+ tp->acceptq[i].aq_cnt = 0;
+ }
+ }
+ break;
+#endif
+#endif
default:
err = -ENOPROTOOPT;
break;
case TCP_QUICKACK:
val = !tp->ack.pingpong;
break;
+
+#ifdef CONFIG_ACCEPT_QUEUES
+ case TCP_ACCEPTQ_SHARE:
+ {
+ struct tcp_acceptq_info tinfo[NUM_ACCEPT_QUEUES];
+ int i;
+
+ if (sk->sk_state != TCP_LISTEN)
+ return -EOPNOTSUPP;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ memset(tinfo, 0, sizeof(tinfo));
+
+ for(i=0; i < NUM_ACCEPT_QUEUES; i++) {
+ tinfo[i].acceptq_wait_time =
+ jiffies_to_msecs(tp->acceptq[i].aq_wait_time);
+ tinfo[i].acceptq_qcount = tp->acceptq[i].aq_qcount;
+ tinfo[i].acceptq_count = tp->acceptq[i].aq_count;
+ tinfo[i].acceptq_shares=tp->acceptq[i].aq_ratio;
+ }
+
+ len = min_t(unsigned int, len, sizeof(tinfo));
+ if (put_user(len, optlen))
+ return -EFAULT;
+
+ if (copy_to_user(optval, (char *)tinfo, len))
+ return -EFAULT;
+
+ return 0;
+ }
+ break;
+#endif
default:
return -ENOPROTOOPT;
};