This commit was manufactured by cvs2svn to create tag

[linux-2.6.git] / net / ipv4 / tcp.c
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c

index 90d8553..bceeaee 100644 (file)
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -257,6 +257,10 @@
  #include <linux/fs.h>
  #include <linux/random.h>
  
+#ifdef CONFIG_CKRM
+#include <linux/ckrm.h>
+#endif
+
  #include <net/icmp.h>
  #include <net/tcp.h>
  #include <net/xfrm.h>
@@ -305,7 +309,7 @@ EXPORT_SYMBOL(tcp_memory_pressure);
  void tcp_enter_memory_pressure(void)
  {
         if (!tcp_memory_pressure) {
-               NET_INC_STATS(LINUX_MIB_TCPMEMORYPRESSURES);
+               NET_INC_STATS(TCPMemoryPressures);
                 tcp_memory_pressure = 1;
         }
  }
@@ -462,13 +466,20 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
  
  int tcp_listen_start(struct sock *sk)
  {
+#ifdef CONFIG_ACCEPT_QUEUES
+       int i = 0;
+#endif
         struct inet_opt *inet = inet_sk(sk);
         struct tcp_opt *tp = tcp_sk(sk);
         struct tcp_listen_opt *lopt;
  
         sk->sk_max_ack_backlog = 0;
         sk->sk_ack_backlog = 0;
+#ifdef CONFIG_ACCEPT_QUEUES
+       tp->accept_queue = NULL;
+#else
         tp->accept_queue = tp->accept_queue_tail = NULL;
+#endif 
         tp->syn_wait_lock = RW_LOCK_UNLOCKED;
         tcp_delack_init(tp);
  
@@ -482,6 +493,23 @@ int tcp_listen_start(struct sock *sk)
                         break;
         get_random_bytes(&lopt->hash_rnd, 4);
  
+#ifdef CONFIG_ACCEPT_QUEUES
+       tp->class_index = 0;
+       for (i=0; i < NUM_ACCEPT_QUEUES; i++) {
+               tp->acceptq[i].aq_tail = NULL;
+               tp->acceptq[i].aq_head = NULL;
+               tp->acceptq[i].aq_wait_time = 0; 
+               tp->acceptq[i].aq_qcount = 0; 
+               tp->acceptq[i].aq_count = 0; 
+               if (i == 0) {
+                       tp->acceptq[i].aq_ratio = 1; 
+               }
+               else {
+                       tp->acceptq[i].aq_ratio = 0; 
+               }
+       }
+#endif
+
         write_lock_bh(&tp->syn_wait_lock);
         tp->listen_opt = lopt;
         write_unlock_bh(&tp->syn_wait_lock);
@@ -498,6 +526,10 @@ int tcp_listen_start(struct sock *sk)
                 sk_dst_reset(sk);
                 sk->sk_prot->hash(sk);
  
+#ifdef CONFIG_CKRM
+               ckrm_cb_listen_start(sk);
+#endif
+
                 return 0;
         }
  
@@ -528,7 +560,18 @@ static void tcp_listen_stop (struct sock *sk)
         write_lock_bh(&tp->syn_wait_lock);
         tp->listen_opt = NULL;
         write_unlock_bh(&tp->syn_wait_lock);
-       tp->accept_queue = tp->accept_queue_tail = NULL;
+
+#ifdef CONFIG_CKRM
+               ckrm_cb_listen_stop(sk);
+#endif
+
+#ifdef CONFIG_ACCEPT_QUEUES
+       for (i = 0; i < NUM_ACCEPT_QUEUES; i++)
+               tp->acceptq[i].aq_head = tp->acceptq[i].aq_tail = NULL;
+#else
+       tp->accept_queue_tail = NULL;
+#endif
+       tp->accept_queue = NULL;
  
         if (lopt->qlen) {
                 for (i = 0; i < TCP_SYNQ_HSIZE; i++) {
@@ -574,7 +617,11 @@ static void tcp_listen_stop (struct sock *sk)
                 local_bh_enable();
                 sock_put(child);
  
+#ifdef CONFIG_ACCEPT_QUEUES
+               sk_acceptq_removed(sk, req->acceptq_class);
+#else
                 sk_acceptq_removed(sk);
+#endif
                 tcp_openreq_fastfree(req);
         }
         BUG_TRAP(!sk->sk_ack_backlog);
@@ -1109,7 +1156,7 @@ static void tcp_prequeue_process(struct sock *sk)
         struct sk_buff *skb;
         struct tcp_opt *tp = tcp_sk(sk);
  
-       NET_ADD_STATS_USER(LINUX_MIB_TCPPREQUEUED, skb_queue_len(&tp->ucopy.prequeue));
+       NET_ADD_STATS_USER(TCPPrequeued, skb_queue_len(&tp->ucopy.prequeue));
  
         /* RX process wants to run with disabled BHs, though it is not
          * necessary */
@@ -1392,7 +1439,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                         /* __ Restore normal policy in scheduler __ */
  
                         if ((chunk = len - tp->ucopy.len) != 0) {
-                               NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
+                               NET_ADD_STATS_USER(TCPDirectCopyFromBacklog, chunk);
                                 len -= chunk;
                                 copied += chunk;
                         }
@@ -1403,7 +1450,7 @@ do_prequeue:
                                 tcp_prequeue_process(sk);
  
                                 if ((chunk = len - tp->ucopy.len) != 0) {
-                                       NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+                                       NET_ADD_STATS_USER(TCPDirectCopyFromPrequeue, chunk);
                                         len -= chunk;
                                         copied += chunk;
                                 }
@@ -1488,7 +1535,7 @@ skip_copy:
                         tcp_prequeue_process(sk);
  
                         if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
-                               NET_ADD_STATS_USER(LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
+                               NET_ADD_STATS_USER(TCPDirectCopyFromPrequeue, chunk);
                                 len -= chunk;
                                 copied += chunk;
                         }
@@ -1659,13 +1706,13 @@ void tcp_close(struct sock *sk, long timeout)
          */
         if (data_was_unread) {
                 /* Unread data was tossed, zap the connection. */
-               NET_INC_STATS_USER(LINUX_MIB_TCPABORTONCLOSE);
+               NET_INC_STATS_USER(TCPAbortOnClose);
                 tcp_set_state(sk, TCP_CLOSE);
                 tcp_send_active_reset(sk, GFP_KERNEL);
         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
                 /* Check zero linger _after_ checking for unread data. */
                 sk->sk_prot->disconnect(sk, 0);
-               NET_INC_STATS_USER(LINUX_MIB_TCPABORTONDATA);
+               NET_INC_STATS_USER(TCPAbortOnData);
         } else if (tcp_close_state(sk)) {
                 /* We FIN if the application ate all the data before
                  * zapping the connection.
@@ -1731,7 +1778,7 @@ adjudge_to_death:
                 if (tp->linger2 < 0) {
                         tcp_set_state(sk, TCP_CLOSE);
                         tcp_send_active_reset(sk, GFP_ATOMIC);
-                       NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER);
+                       NET_INC_STATS_BH(TCPAbortOnLinger);
                 } else {
                         int tmo = tcp_fin_time(tp);
  
@@ -1754,7 +1801,7 @@ adjudge_to_death:
                                        "sockets\n");
                         tcp_set_state(sk, TCP_CLOSE);
                         tcp_send_active_reset(sk, GFP_ATOMIC);
-                       NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
+                       NET_INC_STATS_BH(TCPAbortOnMemory);
                 }
         }
         atomic_inc(&tcp_orphan_count);
@@ -1895,6 +1942,10 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
         struct open_request *req;
         struct sock *newsk;
         int error;
+#ifdef CONFIG_ACCEPT_QUEUES    
+       int prev_class = 0;
+       int first;
+#endif
  
         lock_sock(sk);
  
@@ -1908,7 +1959,6 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
         /* Find already established connection */
         if (!tp->accept_queue) {
                 long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
-
                 /* If this is a non blocking socket don't sleep */
                 error = -EAGAIN;
                 if (!timeo)
@@ -1919,12 +1969,46 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err)
                         goto out;
         }
  
+#ifndef CONFIG_ACCEPT_QUEUES
         req = tp->accept_queue;
         if ((tp->accept_queue = req->dl_next) == NULL)
                 tp->accept_queue_tail = NULL;
+       newsk = req->sk;
+       sk_acceptq_removed(sk);
+#else
+       first = tp->class_index;
+       /* We should always have  request queued here. The accept_queue
+        * is already checked for NULL above.
+        */
+       while(!tp->acceptq[first].aq_head) {
+               tp->acceptq[first].aq_cnt = 0;
+               first = (first+1) & ~NUM_ACCEPT_QUEUES; 
+       }
+        req = tp->acceptq[first].aq_head;
+       tp->acceptq[first].aq_qcount--;
+       tp->acceptq[first].aq_count++;
+       tp->acceptq[first].aq_wait_time+=(jiffies - req->acceptq_time_stamp);
  
+       for (prev_class= first-1 ; prev_class >=0; prev_class--)
+               if (tp->acceptq[prev_class].aq_tail)
+                       break;
+       if (prev_class>=0)
+               tp->acceptq[prev_class].aq_tail->dl_next = req->dl_next; 
+       else 
+               tp->accept_queue = req->dl_next;
+
+       if (req == tp->acceptq[first].aq_tail) 
+               tp->acceptq[first].aq_head = tp->acceptq[first].aq_tail = NULL;
+       else
+               tp->acceptq[first].aq_head = req->dl_next;
+
+       if((++(tp->acceptq[first].aq_cnt)) >= tp->acceptq[first].aq_ratio){
+               tp->acceptq[first].aq_cnt = 0;
+               tp->class_index = ++first & (NUM_ACCEPT_QUEUES-1);
+       }       
         newsk = req->sk;
-       sk_acceptq_removed(sk);
+       sk_acceptq_removed(sk, req->acceptq_class);
+#endif
         tcp_openreq_fastfree(req);
         BUG_TRAP(newsk->sk_state != TCP_SYN_RECV);
         release_sock(sk);
@@ -1936,6 +2020,7 @@ out:
         return NULL;
  }
  
+
  /*
   *     Socket option code for TCP.
   */
@@ -2094,7 +2179,54 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
                         }
                 }
                 break;
+               
+#ifdef CONFIG_ACCEPT_QUEUES
+       case TCP_ACCEPTQ_SHARE:
+#ifdef CONFIG_CKRM
+               // If CKRM is set then the shares are set through rcfs.
+               // Get shares will still succeed.
+               err = -EOPNOTSUPP;
+               break;
+#else          
+               {
+                       char share_wt[NUM_ACCEPT_QUEUES];
+                       int i,j;
  
+                       if (sk->sk_state != TCP_LISTEN)
+                               return -EOPNOTSUPP;
+
+                       if (copy_from_user(share_wt,optval, optlen)) {
+                               err = -EFAULT;
+                               break;
+                       }
+                       j = 0;
+                       for (i = 0; i < NUM_ACCEPT_QUEUES; i++) {
+                               if (share_wt[i]) {
+                                       if (!j)
+                                               j = share_wt[i];
+                                       else if (share_wt[i] < j) {
+                                               j = share_wt[i];
+                                       }
+                               }
+                               else
+                                       tp->acceptq[i].aq_ratio = 0;
+                                       
+                       }
+                       if (j == 0) {
+                               /* Class 0 is always valid. If nothing is 
+                                * specified set class 0 as 1.
+                                */
+                               share_wt[0] = 1;
+                               j = 1;
+                       }
+                       for (i=0; i < NUM_ACCEPT_QUEUES; i++)  {
+                               tp->acceptq[i].aq_ratio = share_wt[i]/j;
+                               tp->acceptq[i].aq_cnt = 0;
+                       }
+               }
+               break;
+#endif
+#endif
         default:
                 err = -ENOPROTOOPT;
                 break;
@@ -2175,6 +2307,40 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
         case TCP_QUICKACK:
                 val = !tp->ack.pingpong;
                 break;
+
+#ifdef CONFIG_ACCEPT_QUEUES
+       case TCP_ACCEPTQ_SHARE: 
+       {
+               struct tcp_acceptq_info tinfo[NUM_ACCEPT_QUEUES];
+               int i;
+
+               if (sk->sk_state != TCP_LISTEN)
+                       return -EOPNOTSUPP;
+
+               if (get_user(len, optlen))
+                       return -EFAULT;
+
+               memset(tinfo, 0, sizeof(tinfo));
+
+               for(i=0; i < NUM_ACCEPT_QUEUES; i++) {
+                       tinfo[i].acceptq_wait_time = 
+                            jiffies_to_msecs(tp->acceptq[i].aq_wait_time);
+                       tinfo[i].acceptq_qcount = tp->acceptq[i].aq_qcount;
+                       tinfo[i].acceptq_count = tp->acceptq[i].aq_count;
+                       tinfo[i].acceptq_shares=tp->acceptq[i].aq_ratio;
+               }
+
+               len = min_t(unsigned int, len, sizeof(tinfo));
+               if (put_user(len, optlen)) 
+                       return -EFAULT;
+                       
+               if (copy_to_user(optval, (char *)tinfo, len))
+                       return -EFAULT;
+               
+               return 0;
+       }
+       break;
+#endif
         default:
                 return -ENOPROTOOPT;
         };