From 48f9cfefbb82dad581bdd45d9a316e2ec1a43e6b Mon Sep 17 00:00:00 2001 From: Sapan Bhatia Date: Sun, 14 Sep 2008 20:47:39 +0000 Subject: [PATCH] Fix for the recently reported accounting problem: Ticket #396 and Ticket #202. --- ...-2.6-522-iptables-connection-tagging.patch | 258 +++++++----------- 1 file changed, 94 insertions(+), 164 deletions(-) diff --git a/linux-2.6-522-iptables-connection-tagging.patch b/linux-2.6-522-iptables-connection-tagging.patch index 151337943..881c4350c 100644 --- a/linux-2.6-522-iptables-connection-tagging.patch +++ b/linux-2.6-522-iptables-connection-tagging.patch @@ -110,8 +110,8 @@ diff -Nurb linux-2.6.22-521/net/netfilter/nf_conntrack_core.c linux-2.6.22-522/n diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilter/xt_MARK.c --- linux-2.6.22-521/net/netfilter/xt_MARK.c 2007-07-08 19:32:17.000000000 -0400 -+++ linux-2.6.22-522/net/netfilter/xt_MARK.c 2008-08-04 16:44:16.000000000 -0400 -@@ -5,13 +5,19 @@ ++++ linux-2.6.22-522/net/netfilter/xt_MARK.c 2008-09-14 16:50:22.000000000 -0400 +@@ -5,13 +5,18 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -122,7 +122,6 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt +#include #include #include -+#include #include +#include +#include @@ -131,10 +130,12 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt #include #include -@@ -21,6 +27,48 @@ +@@ -21,6 +26,50 @@ MODULE_ALIAS("ipt_MARK"); MODULE_ALIAS("ip6t_MARK"); ++#define PEERCRED_SET(x) ((x!=0) && (x!=(unsigned int)-1)) ++ +static inline u_int16_t +get_dst_port(struct nf_conntrack_tuple *tuple) +{ @@ -180,75 +181,18 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt static unsigned int target_v0(struct sk_buff **pskb, const struct net_device *in, -@@ -35,6 +83,67 @@ +@@ -35,6 +84,10 @@ return XT_CONTINUE; } +extern DEFINE_PER_CPU(int, sknid_elevator); -+static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, -+ __be32 daddr, __be16 dport, -+ int dif, struct hlist_head udptable[]) -+{ -+ struct sock *sk, *result = NULL; -+ struct hlist_node *node; -+ unsigned short hnum = ntohs(dport); -+ int badness = -1; -+ -+ read_lock(&udp_hash_lock); -+ -+ sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { -+ struct inet_sock *inet = inet_sk(sk); -+ -+ if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { -+ int score = (sk->sk_family == PF_INET ? 1 : 0); -+ -+ if (inet->rcv_saddr) { -+ if (inet->rcv_saddr != daddr) -+ continue; -+ score+=2; -+ } else { -+ /* block non nx_info ips */ -+ if (!v4_addr_in_nx_info(sk->sk_nx_info, -+ daddr, NXA_MASK_BIND)) -+ continue; -+ } -+ if (inet->daddr) { -+ if (inet->daddr != saddr) -+ continue; -+ score+=2; -+ } -+ if (inet->dport) { -+ if (inet->dport != sport) -+ continue; -+ score+=2; -+ } -+ if (sk->sk_bound_dev_if) { -+ if (sk->sk_bound_dev_if != dif) -+ continue; -+ score+=2; -+ } -+ if (score == 9) { -+ result = sk; -+ break; -+ } else if (score > badness) { -+ result = sk; -+ badness = score; -+ } -+ } -+ } -+ -+ if (result) -+ sock_hold(result); -+ read_unlock(&udp_hash_lock); -+ return result; -+} + +#define related(ct) (ct==(IP_CT_IS_REPLY + IP_CT_RELATED)) + static unsigned int target_v1(struct sk_buff **pskb, const struct net_device *in, -@@ -44,7 +153,20 @@ +@@ -44,7 +97,20 @@ const void *targinfo) { const struct xt_mark_target_info_v1 *markinfo = targinfo; @@ -270,112 +214,98 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt switch (markinfo->mode) { case XT_MARK_SET: -@@ -58,13 +180,121 @@ +@@ -58,13 +124,107 @@ case XT_MARK_OR: mark = (*pskb)->mark | markinfo->mark; break; + -+ case XT_MARK_COPYXID: -+ dif = ((struct rtable *)(*pskb)->dst)->rt_iif; -+ -+ ct = nf_ct_get((*pskb), &ctinfo); -+ if (!ct) -+ break; -+ -+ dir = CTINFO2DIR(ctinfo); -+ src_ip = ct->tuplehash[dir].tuple.src.u3.ip; -+ dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip; -+ src_port = get_src_port(&ct->tuplehash[dir].tuple); -+ proto = ct->tuplehash[dir].tuple.dst.protonum; -+ -+ ip = ct->tuplehash[dir].tuple.dst.u3.ip; -+ port = get_dst_port(&ct->tuplehash[dir].tuple); -+ -+ if (proto == 1) { -+ if ((*pskb)->mark>0) /* The packet is marked, it's going out */ -+ { -+ ct->xid[0]=(*pskb)->mark; -+ } -+ -+ if (ct->xid[0] > 0) { -+ mark = ct->xid[0]; -+ } -+ } -+ else if (proto == 17) { -+ struct sock *sk; -+ if (!(*pskb)->mark) { -+ sk = __udp4_lib_lookup(src_ip, src_port, ip, port, -+ dif, udp_hash); -+ -+ if (sk && hooknum==NF_IP_LOCAL_IN) { -+ mark=sk->sk_nid; -+ } -+ -+ if (sk) { -+ sock_put(sk); -+ } -+ } -+ else -+ if ((*pskb)->mark>0) /* The packet is marked, it's going out */ -+ { -+ ct->xid[0]=(*pskb)->mark; -+ } -+ } -+ else if (proto == 6) { -+ if ((*pskb)->sk) { -+ /* It's a listening socket */ -+ connection_sk = (*pskb)->sk; -+ sock_hold(connection_sk); -+ } -+ else /* It might be a connected socket */ -+ connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif); -+ -+ -+ if (connection_sk /* Well, some kind of TCP socket */) { -+ if (connection_sk->sk_peercred.uid == 0 || connection_sk->sk_peercred.uid == (__u32) -1) { -+ /* Normal case - the peercred on the socket is not set */ -+ connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[!dir]; -+ } -+ else /* Exceptional case - the peercred was set using SET_PEERCRED. Somebody wants us -+ to mark packets with some arbitrary value.*/ -+ mark=connection_sk->sk_peercred.uid; -+ -+ /* Has this connection already been tagged? */ -+ if (ct->xid[dir] < 1) { -+ /* No - let's tag it */ -+ ct->xid[dir]=connection_sk->sk_nid; -+ } -+ -+ if (mark==-1 && (connection_sk->sk_nid != 0)) -+ mark = ct->xid[dir]; -+ -+ -+ if (connection_sk->sk_state == TCP_TIME_WAIT) { -+ inet_twsk_put(inet_twsk(connection_sk)); -+ break; -+ } -+ else -+ sock_put(connection_sk); -+ } -+ -+ /* Covers CoDemux case */ -+ if (mark < 1 && (ct->xid[dir]>0)) { -+ mark = ct->xid[dir]; ++ case XT_MARK_COPYXID: ++ dif = ((struct rtable *)(*pskb)->dst)->rt_iif; ++ ++ ct = nf_ct_get((*pskb), &ctinfo); ++ if (!ct) ++ break; ++ ++ dir = CTINFO2DIR(ctinfo); ++ src_ip = ct->tuplehash[dir].tuple.src.u3.ip; ++ dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip; ++ src_port = get_src_port(&ct->tuplehash[dir].tuple); ++ proto = ct->tuplehash[dir].tuple.dst.protonum; ++ ++ ip = ct->tuplehash[dir].tuple.dst.u3.ip; ++ port = get_dst_port(&ct->tuplehash[dir].tuple); ++ ++ if (proto == 1 || proto == 17) { ++ if ((*pskb)->mark>0) /* The packet is marked, it's going out */ ++ { ++ ct->xid[0]=(*pskb)->mark; } -+ if (mark < 1 && (ct->xid[!dir]>0)) { -+ mark = ct->xid[!dir]; -+ } -+ -+ /* All else failed. Is this a connection over raw sockets? That explains -+ * why we couldn't get anything out of skb->sk, or look up a "real" connection.*/ -+ if (ct->xid[dir]<1) { -+ if ((*pskb)->skb_tag) { -+ ct->xid[dir]=(*pskb)->skb_tag; -+ } -+ } -+ } -+ break; ++ if (ct->xid[0] > 0) { ++ mark = ct->xid[0]; ++ } ++ ++ } ++ else if (proto == 6) /* TCP */{ ++ int sockettype=0; /* Established socket */ ++ /* Looks for an established socket or a listening socket corresponding to the 4-tuple, in ++ * that order. The order is important for Codemux connections to be handled properly */ ++ ++ connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif); ++ ++ if (!connection_sk) { ++ connection_sk = inet_lookup_listener(&tcp_hashinfo, ip, port, dif); ++ sockettype=1; /* Listening socket */ ++ } ++ ++ if (connection_sk) { ++ /* The peercred is not set. We set it if the other side has an xid. */ ++ if (!PEERCRED_SET(connection_sk->sk_peercred.uid) ++ && ct->xid[!dir]>0 && (sockettype==0)) { ++ connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[!dir]; ++ } ++ ++ /* The peercred is set, and is not equal to the XID of 'the other side' */ ++ else if (PEERCRED_SET(connection_sk->sk_peercred.uid) && (connection_sk->sk_peercred.uid != ct->xid[!dir]) && (sockettype==0)) { ++ mark = connection_sk->sk_peercred.uid; ++ } ++ ++ /* Has this connection already been tagged? */ ++ if (ct->xid[dir] < 1) { ++ /* No - let's tag it */ ++ ct->xid[dir]=connection_sk->sk_nid; ++ ++ } ++ ++ if (mark==-1 && (ct->xid[dir]!= 0)) ++ mark = ct->xid[dir]; ++ ++ if (connection_sk->sk_state == TCP_TIME_WAIT) { ++ inet_twsk_put(inet_twsk(connection_sk)); ++ break; ++ } ++ else ++ sock_put(connection_sk); ++ } ++ ++ /* All else failed. Is this a connection over raw sockets? That explains ++ * why we couldn't get anything out of skb->sk, or look up a "real" connection.*/ ++ if (ct->xid[dir]<1) { ++ if ((*pskb)->skb_tag) { ++ ct->xid[dir]=(*pskb)->skb_tag; ++ } ++ } ++ ++ /* Covers CoDemux case */ ++ if (mark < 1 && (ct->xid[dir]>0)) { ++ mark = ct->xid[dir]; ++ } ++ ++ if (mark < 1 && (ct->xid[!dir]>0)) { ++ mark = ct->xid[!dir]; ++ } ++ break; ++ } + } + if (mark != -1) { (*pskb)->mark = mark; @@ -393,7 +323,7 @@ diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilt static int checkentry_v0(const char *tablename, const void *entry, -@@ -92,7 +322,8 @@ +@@ -92,7 +252,8 @@ if (markinfo->mode != XT_MARK_SET && markinfo->mode != XT_MARK_AND -- 2.43.0