vserver 1.9.5.x5
[linux-2.6.git] / net / ipv4 / tcp_diag.c
index 3e23c6d..608c425 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/random.h>
 #include <linux/cache.h>
 #include <linux/init.h>
+#include <linux/time.h>
 
 #include <net/icmp.h>
 #include <net/tcp.h>
 
 #include <linux/tcp_diag.h>
 
+struct tcpdiag_entry
+{
+       u32 *saddr;
+       u32 *daddr;
+       u16 sport;
+       u16 dport;
+       u16 family;
+       u16 userlocks;
+};
+
 static struct sock *tcpnl;
 
 
@@ -41,66 +52,11 @@ static struct sock *tcpnl;
    rta->rta_len = rtalen;                   \
    RTA_DATA(rta); })
 
-/* Return information about state of tcp endpoint in API format. */
-void tcp_get_info(struct sock *sk, struct tcp_info *info)
-{
-       struct tcp_opt *tp = tcp_sk(sk);
-       u32 now = tcp_time_stamp;
-
-       memset(info, 0, sizeof(*info));
-
-       info->tcpi_state = sk->sk_state;
-       info->tcpi_ca_state = tp->ca_state;
-       info->tcpi_retransmits = tp->retransmits;
-       info->tcpi_probes = tp->probes_out;
-       info->tcpi_backoff = tp->backoff;
-
-       if (tp->tstamp_ok)
-               info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
-       if (tp->sack_ok)
-               info->tcpi_options |= TCPI_OPT_SACK;
-       if (tp->wscale_ok) {
-               info->tcpi_options |= TCPI_OPT_WSCALE;
-               info->tcpi_snd_wscale = tp->snd_wscale;
-               info->tcpi_rcv_wscale = tp->rcv_wscale;
-       } 
-
-       if (tp->ecn_flags&TCP_ECN_OK)
-               info->tcpi_options |= TCPI_OPT_ECN;
-
-       info->tcpi_rto = jiffies_to_usecs(tp->rto);
-       info->tcpi_ato = jiffies_to_usecs(tp->ack.ato);
-       info->tcpi_snd_mss = tp->mss_cache_std;
-       info->tcpi_rcv_mss = tp->ack.rcv_mss;
-
-       info->tcpi_unacked = tcp_get_pcount(&tp->packets_out);
-       info->tcpi_sacked = tcp_get_pcount(&tp->sacked_out);
-       info->tcpi_lost = tcp_get_pcount(&tp->lost_out);
-       info->tcpi_retrans = tcp_get_pcount(&tp->retrans_out);
-       info->tcpi_fackets = tcp_get_pcount(&tp->fackets_out);
-
-       info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
-       info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime);
-       info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
-
-       info->tcpi_pmtu = tp->pmtu_cookie;
-       info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
-       info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
-       info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
-       info->tcpi_snd_ssthresh = tp->snd_ssthresh;
-       info->tcpi_snd_cwnd = tp->snd_cwnd;
-       info->tcpi_advmss = tp->advmss;
-       info->tcpi_reordering = tp->reordering;
-
-       info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
-       info->tcpi_rcv_space = tp->rcvq_space.space;
-}
-
 static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
-                       int ext, u32 pid, u32 seq)
+                       int ext, u32 pid, u32 seq, u16 nlmsg_flags)
 {
-       struct inet_opt *inet = inet_sk(sk);
-       struct tcp_opt *tp = tcp_sk(sk);
+       struct inet_sock *inet = inet_sk(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
        struct tcpdiagmsg *r;
        struct nlmsghdr  *nlh;
        struct tcp_info  *info = NULL;
@@ -109,6 +65,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
        unsigned char    *b = skb->tail;
 
        nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
+       nlh->nlmsg_flags = nlmsg_flags;
        r = NLMSG_DATA(nlh);
        if (sk->sk_state != TCP_TIME_WAIT) {
                if (ext & (1<<(TCPDIAG_MEMINFO-1)))
@@ -146,7 +103,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
                r->tcpdiag_wqueue = 0;
                r->tcpdiag_uid = 0;
                r->tcpdiag_inode = 0;
-#ifdef CONFIG_IPV6
+#ifdef CONFIG_IP_TCPDIAG_IPV6
                if (r->tcpdiag_family == AF_INET6) {
                        ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
                                       &tw->tw_v6_rcv_saddr);
@@ -163,7 +120,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
        r->id.tcpdiag_src[0] = inet->rcv_saddr;
        r->id.tcpdiag_dst[0] = inet->daddr;
 
-#ifdef CONFIG_IPV6
+#ifdef CONFIG_IP_TCPDIAG_IPV6
        if (r->tcpdiag_family == AF_INET6) {
                struct ipv6_pinfo *np = inet6_sk(sk);
 
@@ -231,11 +188,19 @@ nlmsg_failure:
        return -1;
 }
 
-extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif);
-#ifdef CONFIG_IPV6
+extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport,
+                                 int dif);
+#ifdef CONFIG_IP_TCPDIAG_IPV6
 extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
                                  struct in6_addr *daddr, u16 dport,
                                  int dif);
+#else
+static inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
+                                        struct in6_addr *daddr, u16 dport,
+                                        int dif)
+{
+       return NULL;
+}
 #endif
 
 static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
@@ -250,7 +215,7 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
                                   req->id.tcpdiag_src[0], req->id.tcpdiag_sport,
                                   req->id.tcpdiag_if);
        }
-#ifdef CONFIG_IPV6
+#ifdef CONFIG_IP_TCPDIAG_IPV6
        else if (req->tcpdiag_family == AF_INET6) {
                sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport,
                                   (struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport,
@@ -280,7 +245,7 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
 
        if (tcpdiag_fill(rep, sk, req->tcpdiag_ext,
                         NETLINK_CB(in_skb).pid,
-                        nlh->nlmsg_seq) <= 0)
+                        nlh->nlmsg_seq, 0) <= 0)
                BUG();
 
        err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
@@ -324,11 +289,11 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits)
 }
 
 
-static int tcpdiag_bc_run(const void *bc, int len, struct sock *sk)
+static int tcpdiag_bc_run(const void *bc, int len,
+                         const struct tcpdiag_entry *entry)
 {
        while (len > 0) {
                int yes = 1;
-               struct inet_opt *inet = inet_sk(sk);
                const struct tcpdiag_bc_op *op = bc;
 
                switch (op->code) {
@@ -338,19 +303,19 @@ static int tcpdiag_bc_run(const void *bc, int len, struct sock *sk)
                        yes = 0;
                        break;
                case TCPDIAG_BC_S_GE:
-                       yes = inet->num >= op[1].no;
+                       yes = entry->sport >= op[1].no;
                        break;
                case TCPDIAG_BC_S_LE:
-                       yes = inet->num <= op[1].no;
+                       yes = entry->dport <= op[1].no;
                        break;
                case TCPDIAG_BC_D_GE:
-                       yes = ntohs(inet->dport) >= op[1].no;
+                       yes = entry->dport >= op[1].no;
                        break;
                case TCPDIAG_BC_D_LE:
-                       yes = ntohs(inet->dport) <= op[1].no;
+                       yes = entry->dport <= op[1].no;
                        break;
                case TCPDIAG_BC_AUTO:
-                       yes = !(sk->sk_userlocks & SOCK_BINDPORT_LOCK);
+                       yes = !(entry->userlocks & SOCK_BINDPORT_LOCK);
                        break;
                case TCPDIAG_BC_S_COND:
                case TCPDIAG_BC_D_COND:
@@ -360,7 +325,7 @@ static int tcpdiag_bc_run(const void *bc, int len, struct sock *sk)
 
                        if (cond->port != -1 &&
                            cond->port != (op->code == TCPDIAG_BC_S_COND ?
-                                            inet->num : ntohs(inet->dport))) {
+                                            entry->sport : entry->dport)) {
                                yes = 0;
                                break;
                        }
@@ -368,26 +333,14 @@ static int tcpdiag_bc_run(const void *bc, int len, struct sock *sk)
                        if (cond->prefix_len == 0)
                                break;
 
-#ifdef CONFIG_IPV6
-                       if (sk->sk_family == AF_INET6) {
-                               struct ipv6_pinfo *np = inet6_sk(sk);
-
-                               if (op->code == TCPDIAG_BC_S_COND)
-                                       addr = (u32*)&np->rcv_saddr;
-                               else
-                                       addr = (u32*)&np->daddr;
-                       } else
-#endif
-                       {
-                               if (op->code == TCPDIAG_BC_S_COND)
-                                       addr = &inet->rcv_saddr;
-                               else
-                                       addr = &inet->daddr;
-                       }
+                       if (op->code == TCPDIAG_BC_S_COND)
+                               addr = entry->saddr;
+                       else
+                               addr = entry->daddr;
 
                        if (bitstring_match(addr, cond->addr, cond->prefix_len))
                                break;
-                       if (sk->sk_family == AF_INET6 &&
+                       if (entry->family == AF_INET6 &&
                            cond->family == AF_INET) {
                                if (addr[0] == 0 && addr[1] == 0 &&
                                    addr[2] == htonl(0xffff) &&
@@ -466,16 +419,182 @@ static int tcpdiag_bc_audit(const void *bytecode, int bytecode_len)
        return len == 0 ? 0 : -EINVAL;
 }
 
+static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk,
+                            struct netlink_callback *cb)
+{
+       struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
+
+       if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+               struct tcpdiag_entry entry;
+               struct rtattr *bc = (struct rtattr *)(r + 1);
+               struct inet_sock *inet = inet_sk(sk);
+
+               entry.family = sk->sk_family;
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+               if (entry.family == AF_INET6) {
+                       struct ipv6_pinfo *np = inet6_sk(sk);
+
+                       entry.saddr = np->rcv_saddr.s6_addr32;
+                       entry.daddr = np->daddr.s6_addr32;
+               } else
+#endif
+               {
+                       entry.saddr = &inet->rcv_saddr;
+                       entry.daddr = &inet->daddr;
+               }
+               entry.sport = inet->num;
+               entry.dport = ntohs(inet->dport);
+               entry.userlocks = sk->sk_userlocks;
+
+               if (!tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry))
+                       return 0;
+       }
+
+       return tcpdiag_fill(skb, sk, r->tcpdiag_ext, NETLINK_CB(cb->skb).pid,
+                           cb->nlh->nlmsg_seq, NLM_F_MULTI);
+}
+
+static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk,
+                           struct open_request *req,
+                           u32 pid, u32 seq)
+{
+       struct inet_sock *inet = inet_sk(sk);
+       unsigned char *b = skb->tail;
+       struct tcpdiagmsg *r;
+       struct nlmsghdr *nlh;
+       long tmo;
+
+       nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
+       nlh->nlmsg_flags = NLM_F_MULTI;
+       r = NLMSG_DATA(nlh);
+
+       r->tcpdiag_family = sk->sk_family;
+       r->tcpdiag_state = TCP_SYN_RECV;
+       r->tcpdiag_timer = 1;
+       r->tcpdiag_retrans = req->retrans;
+
+       r->id.tcpdiag_if = sk->sk_bound_dev_if;
+       r->id.tcpdiag_cookie[0] = (u32)(unsigned long)req;
+       r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
+
+       tmo = req->expires - jiffies;
+       if (tmo < 0)
+               tmo = 0;
+
+       r->id.tcpdiag_sport = inet->sport;
+       r->id.tcpdiag_dport = req->rmt_port;
+       r->id.tcpdiag_src[0] = req->af.v4_req.loc_addr;
+       r->id.tcpdiag_dst[0] = req->af.v4_req.rmt_addr;
+       r->tcpdiag_expires = jiffies_to_msecs(tmo),
+       r->tcpdiag_rqueue = 0;
+       r->tcpdiag_wqueue = 0;
+       r->tcpdiag_uid = sock_i_uid(sk);
+       r->tcpdiag_inode = 0;
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+       if (r->tcpdiag_family == AF_INET6) {
+               ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src,
+                              &req->af.v6_req.loc_addr);
+               ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst,
+                              &req->af.v6_req.rmt_addr);
+       }
+#endif
+       nlh->nlmsg_len = skb->tail - b;
+
+       return skb->len;
+
+nlmsg_failure:
+       skb_trim(skb, b - skb->data);
+       return -1;
+}
+
+static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk,
+                            struct netlink_callback *cb)
+{
+       struct tcpdiag_entry entry;
+       struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
+       struct tcp_sock *tp = tcp_sk(sk);
+       struct tcp_listen_opt *lopt;
+       struct rtattr *bc = NULL;
+       struct inet_sock *inet = inet_sk(sk);
+       int j, s_j;
+       int reqnum, s_reqnum;
+       int err = 0;
+
+       s_j = cb->args[3];
+       s_reqnum = cb->args[4];
+
+       if (s_j > 0)
+               s_j--;
+
+       entry.family = sk->sk_family;
+
+       read_lock_bh(&tp->syn_wait_lock);
+
+       lopt = tp->listen_opt;
+       if (!lopt || !lopt->qlen)
+               goto out;
+
+       if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) {
+               bc = (struct rtattr *)(r + 1);
+               entry.sport = inet->num;
+               entry.userlocks = sk->sk_userlocks;
+       }
+
+       for (j = s_j; j < TCP_SYNQ_HSIZE; j++) {
+               struct open_request *req, *head = lopt->syn_table[j];
+
+               reqnum = 0;
+               for (req = head; req; reqnum++, req = req->dl_next) {
+                       if (reqnum < s_reqnum)
+                               continue;
+                       if (r->id.tcpdiag_dport != req->rmt_port &&
+                           r->id.tcpdiag_dport)
+                               continue;
+
+                       if (bc) {
+                               entry.saddr =
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+                                       (entry.family == AF_INET6) ?
+                                       req->af.v6_req.loc_addr.s6_addr32 :
+#endif
+                                       &req->af.v4_req.loc_addr;
+                               entry.daddr = 
+#ifdef CONFIG_IP_TCPDIAG_IPV6
+                                       (entry.family == AF_INET6) ?
+                                       req->af.v6_req.rmt_addr.s6_addr32 :
+#endif
+                                       &req->af.v4_req.rmt_addr;
+                               entry.dport = ntohs(req->rmt_port);
+
+                               if (!tcpdiag_bc_run(RTA_DATA(bc),
+                                                   RTA_PAYLOAD(bc), &entry))
+                                       continue;
+                       }
+
+                       err = tcpdiag_fill_req(skb, sk, req,
+                                              NETLINK_CB(cb->skb).pid,
+                                              cb->nlh->nlmsg_seq);
+                       if (err < 0) {
+                               cb->args[3] = j + 1;
+                               cb->args[4] = reqnum;
+                               goto out;
+                       }
+               }
+
+               s_reqnum = 0;
+       }
+
+out:
+       read_unlock_bh(&tp->syn_wait_lock);
+
+       return err;
+}
 
 static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
        int i, num;
        int s_i, s_num;
        struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
-       struct rtattr *bc = NULL;
-
-       if (cb->nlh->nlmsg_len > 4+NLMSG_SPACE(sizeof(struct tcpdiagreq)))
-               bc = (struct rtattr*)(r+1);
 
        s_i = cb->args[1];
        s_num = num = cb->args[2];
@@ -488,31 +607,50 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
                        struct sock *sk;
                        struct hlist_node *node;
 
-                       if (i > s_i)
-                               s_num = 0;
-
                        num = 0;
                        sk_for_each(sk, node, &tcp_listening_hash[i]) {
-                               struct inet_opt *inet = inet_sk(sk);
-                               if (num < s_num)
-                                       goto next_listen;
-                               if (!(r->tcpdiag_states&TCPF_LISTEN) ||
-                                   r->id.tcpdiag_dport)
-                                       goto next_listen;
+                               struct inet_sock *inet = inet_sk(sk);
+
+                               if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+                                       continue;
+
+                               if (num < s_num) {
+                                       num++;
+                                       continue;
+                               }
+
                                if (r->id.tcpdiag_sport != inet->sport &&
                                    r->id.tcpdiag_sport)
                                        goto next_listen;
-                               if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk))
+
+                               if (!(r->tcpdiag_states&TCPF_LISTEN) ||
+                                   r->id.tcpdiag_dport ||
+                                   cb->args[3] > 0)
+                                       goto syn_recv;
+
+                               if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
+                                       tcp_listen_unlock();
+                                       goto done;
+                               }
+
+syn_recv:
+                               if (!(r->tcpdiag_states&TCPF_SYN_RECV))
                                        goto next_listen;
-                               if (tcpdiag_fill(skb, sk, r->tcpdiag_ext,
-                                                NETLINK_CB(cb->skb).pid,
-                                                cb->nlh->nlmsg_seq) <= 0) {
+
+                               if (tcpdiag_dump_reqs(skb, sk, cb) < 0) {
                                        tcp_listen_unlock();
                                        goto done;
                                }
+
 next_listen:
+                               cb->args[3] = 0;
+                               cb->args[4] = 0;
                                ++num;
                        }
+
+                       s_num = 0;
+                       cb->args[3] = 0;
+                       cb->args[4] = 0;
                }
                tcp_listen_unlock();
 skip_listen_ht:
@@ -535,8 +673,10 @@ skip_listen_ht:
 
                num = 0;
                sk_for_each(sk, node, &head->chain) {
-                       struct inet_opt *inet = inet_sk(sk);
+                       struct inet_sock *inet = inet_sk(sk);
 
+                       if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+                               continue;
                        if (num < s_num)
                                goto next_normal;
                        if (!(r->tcpdiag_states & (1 << sk->sk_state)))
@@ -546,11 +686,7 @@ skip_listen_ht:
                                goto next_normal;
                        if (r->id.tcpdiag_dport != inet->dport && r->id.tcpdiag_dport)
                                goto next_normal;
-                       if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk))
-                               goto next_normal;
-                       if (tcpdiag_fill(skb, sk, r->tcpdiag_ext,
-                                        NETLINK_CB(cb->skb).pid,
-                                        cb->nlh->nlmsg_seq) <= 0) {
+                       if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
                                read_unlock_bh(&head->lock);
                                goto done;
                        }
@@ -561,8 +697,10 @@ next_normal:
                if (r->tcpdiag_states&TCPF_TIME_WAIT) {
                        sk_for_each(sk, node,
                                    &tcp_ehash[i + tcp_ehash_size].chain) {
-                               struct inet_opt *inet = inet_sk(sk);
+                               struct inet_sock *inet = inet_sk(sk);
 
+                               if (!vx_check(sk->sk_xid, VX_IDENT|VX_WATCH))
+                                       continue;
                                if (num < s_num)
                                        goto next_dying;
                                if (r->id.tcpdiag_sport != inet->sport &&
@@ -571,11 +709,7 @@ next_normal:
                                if (r->id.tcpdiag_dport != inet->dport &&
                                    r->id.tcpdiag_dport)
                                        goto next_dying;
-                               if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk))
-                                       goto next_dying;
-                               if (tcpdiag_fill(skb, sk, r->tcpdiag_ext,
-                                                NETLINK_CB(cb->skb).pid,
-                                                cb->nlh->nlmsg_seq) <= 0) {
+                               if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
                                        read_unlock_bh(&head->lock);
                                        goto done;
                                }
@@ -657,9 +791,19 @@ static void tcpdiag_rcv(struct sock *sk, int len)
        }
 }
 
-void __init tcpdiag_init(void)
+static int __init tcpdiag_init(void)
 {
        tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv);
        if (tcpnl == NULL)
-               panic("tcpdiag_init: Cannot create netlink socket.");
+               return -ENOMEM;
+       return 0;
+}
+
+static void __exit tcpdiag_exit(void)
+{
+       sock_release(tcpnl->sk_socket);
 }
+
+module_init(tcpdiag_init);
+module_exit(tcpdiag_exit);
+MODULE_LICENSE("GPL");