1 commit 39e1cee3184d275fa3ec4122de39b90d0d8e9bf4
2 Author: root <root@rhel6.(none)>
3 Date: Thu Apr 29 19:59:33 2010 -0400
5 linux-2.6-522-iptables-connection-tagging.patch
7 diff --git a/include/linux/netfilter/xt_SETXID.h b/include/linux/netfilter/xt_SETXID.h
11 +++ b/include/linux/netfilter/xt_SETXID.h
13 +#ifndef _XT_SETXID_H_target
14 +#define _XT_SETXID_H_target
20 +struct xt_setxid_target_info_v2 {
25 +#endif /*_XT_SETXID_H_target*/
26 diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
27 index 5cf7270..95a5fde 100644
28 --- a/include/net/netfilter/nf_conntrack.h
29 +++ b/include/net/netfilter/nf_conntrack.h
30 @@ -119,6 +119,9 @@ struct nf_conn {
31 /* Storage reserved for other modules: */
32 union nf_conntrack_proto proto;
34 + /* PLANETLAB. VNET-specific */
35 + int xid[IP_CT_DIR_MAX];
38 struct nf_ct_ext *ext;
40 diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
41 index 634d14a..a2872f5 100644
42 --- a/net/netfilter/Kconfig
43 +++ b/net/netfilter/Kconfig
44 @@ -543,6 +543,13 @@ config NETFILTER_XT_MATCH_CLUSTER
45 If you say Y or M here, try `iptables -m cluster --help` for
48 +config NETFILTER_XT_TARGET_SETXID
49 + tristate '"SETXID" target support'
50 + depends on NETFILTER_XTABLES
52 + This option adds a `SETXID' target, which allows you to alter the
55 config NETFILTER_XT_MATCH_COMMENT
56 tristate '"comment" match support'
57 depends on NETFILTER_ADVANCED
58 diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
59 index 49f62ee..820655e 100644
60 --- a/net/netfilter/Makefile
61 +++ b/net/netfilter/Makefile
62 @@ -41,6 +41,7 @@ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
63 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
66 +obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o
67 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
68 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
69 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
70 diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
71 index 1e1df20..144e131 100644
72 --- a/net/netfilter/nf_conntrack_core.c
73 +++ b/net/netfilter/nf_conntrack_core.c
74 @@ -673,6 +673,9 @@ init_conntrack(struct net *net,
75 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
76 &net->ct.unconfirmed);
78 + ct->xid[IP_CT_DIR_ORIGINAL] = -1;
79 + ct->xid[IP_CT_DIR_REPLY] = -1;
81 spin_unlock_bh(&nf_conntrack_lock);
84 diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
85 index 225f8d1..7513997 100644
86 --- a/net/netfilter/xt_MARK.c
87 +++ b/net/netfilter/xt_MARK.c
89 #include <linux/module.h>
90 #include <linux/skbuff.h>
93 #include <net/checksum.h>
94 +#include <net/route.h>
95 +#include <net/inet_hashtables.h>
96 +#include <net/net_namespace.h>
98 +#include <net/netfilter/nf_conntrack.h>
100 #include <linux/netfilter/x_tables.h>
101 #include <linux/netfilter/xt_MARK.h>
102 @@ -24,22 +30,267 @@ MODULE_DESCRIPTION("Xtables: packet mark modification");
103 MODULE_ALIAS("ipt_MARK");
104 MODULE_ALIAS("ip6t_MARK");
106 +DECLARE_PER_CPU(int, sknid_elevator);
108 +#define PEERCRED_SET(x) ((x!=0) && (x!=(unsigned int)-1))
110 +static inline u_int16_t get_dst_port(struct nf_conntrack_tuple *tuple)
112 + switch (tuple->dst.protonum) {
114 + /* XXX Truncate 32-bit GRE key to 16 bits */
115 + return tuple->dst.u.gre.key;
117 + /* Bind on ICMP echo ID */
118 + return tuple->src.u.icmp.id;
120 + return tuple->dst.u.tcp.port;
122 + return tuple->dst.u.udp.port;
124 + return tuple->dst.u.all;
128 +static inline u_int16_t get_src_port(struct nf_conntrack_tuple *tuple)
130 + switch (tuple->dst.protonum) {
132 + /* XXX Truncate 32-bit GRE key to 16 bits */
133 + return htons(ntohl(tuple->src.u.gre.key));
135 + /* Bind on ICMP echo ID */
136 + return tuple->src.u.icmp.id;
138 + return tuple->src.u.tcp.port;
140 + return tuple->src.u.udp.port;
142 + return tuple->src.u.all;
146 +static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
147 + __be16 sport, __be32 daddr, __be16 dport,
148 + int dif, struct hlist_head udptable[])
150 + struct sock *sk, *result = NULL;
151 + struct hlist_node *node;
152 + unsigned short hnum = ntohs(dport);
156 + sk_for_each_rcu(sk, node, &udptable[udp_hashfn(net, hnum)]) {
157 + struct inet_sock *inet = inet_sk(sk);
159 + if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
160 + !ipv6_only_sock(sk)) {
161 + int score = (sk->sk_family == PF_INET ? 1 : 0);
163 + if (inet->rcv_saddr) {
164 + if (inet->rcv_saddr != daddr)
168 + /* block non nx_info ips */
169 + if (!v4_addr_in_nx_info(sk->sk_nx_info,
170 + daddr, NXA_MASK_BIND))
174 + if (inet->daddr != saddr)
179 + if (inet->dport != sport)
183 + if (sk->sk_bound_dev_if) {
184 + if (sk->sk_bound_dev_if != dif)
191 + } else if (score > badness) {
207 mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
209 const struct xt_mark_tginfo2 *info = par->targinfo;
211 + enum ip_conntrack_info ctinfo;
212 + struct sock *connection_sk;
214 + struct nf_conn *ct;
215 + extern struct inet_hashinfo tcp_hashinfo;
216 + enum ip_conntrack_dir dir;
220 + u_int16_t proto, src_port;
224 + if (info->mark == ~0U) {
225 + // As of 2.6.27.39, Dec 8 2009,
226 + // NetNS + VNET = Trouble
227 + // Let's handle this as a special case
228 + struct net *net = dev_net(skb->dev);
229 + if (!net_eq(net, &init_net)) {
232 + return XT_CONTINUE;
236 + dif = ((struct rtable *)(skb->dst))->rt_iif;
238 + ct = nf_ct_get(skb, &ctinfo);
240 + goto out_mark_finish;
242 + dir = CTINFO2DIR(ctinfo);
243 + src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
244 + dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
245 + src_port = get_src_port(&ct->tuplehash[dir].tuple);
246 + proto = ct->tuplehash[dir].tuple.dst.protonum;
248 + ip = ct->tuplehash[dir].tuple.dst.u3.ip;
249 + port = get_dst_port(&ct->tuplehash[dir].tuple);
253 + /* The packet is marked, it's going out */
254 + ct->xid[0] = skb->mark;
256 + if (ct->xid[0] > 0)
258 + } else if (proto == 17) {
261 + sk = __udp4_lib_lookup(net, src_ip, src_port,
262 + ip, port, dif, udp_hash);
264 + if (sk && hooknum == NF_INET_LOCAL_IN)
269 + } else if (skb->mark > 0)
270 + /* The packet is marked, it's going out */
271 + ct->xid[0] = skb->mark;
272 + } else if (proto == 6) { /* TCP */
273 + int sockettype = 0; /* Established socket */
275 + /* Looks for an established socket or a listening
276 + socket corresponding to the 4-tuple, in that order.
277 + The order is important for Codemux connections
278 + to be handled properly */
280 + connection_sk = inet_lookup_established(net,
286 + if (!connection_sk) {
287 + connection_sk = inet_lookup_listener(net,
291 + sockettype = 1; /* Listening socket */
294 + if (connection_sk) {
295 + if (connection_sk->sk_state == TCP_TIME_WAIT) {
296 + inet_twsk_put(inet_twsk(connection_sk));
297 + goto out_mark_finish;
300 + /* The peercred is not set. We set it if the other side has an xid. */
302 + (connection_sk->sk_peercred.uid)
303 + && ct->xid[!dir] > 0 && (sockettype == 0)) {
304 + connection_sk->sk_peercred.gid =
305 + connection_sk->sk_peercred.uid =
309 + /* The peercred is set, and is not equal to the XID of 'the other side' */
310 + else if (PEERCRED_SET
311 + (connection_sk->sk_peercred.uid)
312 + && (connection_sk->sk_peercred.uid !=
314 + && (sockettype == 0)) {
315 + mark = connection_sk->sk_peercred.uid;
318 + /* Has this connection already been tagged? */
319 + if (ct->xid[dir] < 1) {
320 + /* No - let's tag it */
321 + ct->xid[dir] = connection_sk->sk_nid;
324 + if (mark == -1 && (ct->xid[dir] != 0))
325 + mark = ct->xid[dir];
327 + sock_put(connection_sk);
330 + /* All else failed. Is this a connection over raw sockets?
331 + That explains why we couldn't get anything out of skb->sk,
332 + or look up a "real" connection. */
333 + if (ct->xid[dir] < 1) {
335 + ct->xid[dir] = skb->skb_tag;
338 + /* Covers CoDemux case */
339 + if (mark < 1 && (ct->xid[dir] > 0))
340 + mark = ct->xid[dir];
342 + if (mark < 1 && (ct->xid[!dir] > 0))
343 + mark = ct->xid[!dir];
344 + goto out_mark_finish;
347 + mark = (skb->mark & ~info->mask) ^ info->mark;
353 + curtag = &__get_cpu_var(sknid_elevator);
354 + if (mark > 0 && *curtag == -2 && hooknum == NF_INET_LOCAL_IN)
357 - skb->mark = (skb->mark & ~info->mask) ^ info->mark;
361 static struct xt_target mark_tg_reg __read_mostly = {
364 - .family = NFPROTO_UNSPEC,
366 - .targetsize = sizeof(struct xt_mark_tginfo2),
370 + .family = NFPROTO_UNSPEC,
372 + .targetsize = sizeof(struct xt_mark_tginfo2),
376 static int __init mark_tg_init(void)
377 diff --git a/net/netfilter/xt_SETXID.c b/net/netfilter/xt_SETXID.c
379 index 0000000..f8553c5
381 +++ b/net/netfilter/xt_SETXID.c
383 +#include <linux/module.h>
384 +#include <linux/skbuff.h>
385 +#include <linux/ip.h>
386 +#include <net/checksum.h>
387 +#include <linux/vs_network.h>
389 +#include <linux/netfilter/x_tables.h>
390 +#include <linux/netfilter/xt_SETXID.h>
392 +MODULE_LICENSE("GPL");
394 +MODULE_DESCRIPTION("");
395 +MODULE_ALIAS("ipt_SETXID");
398 +target_v2(struct sk_buff **pskb,
399 + const struct net_device *in,
400 + const struct net_device *out,
401 + unsigned int hooknum,
402 + const struct xt_target *target, const void *targinfo)
404 + const struct xt_setxid_target_info_v2 *setxidinfo = targinfo;
406 + switch (setxidinfo->mode) {
407 + case XT_SET_PACKET_XID:
408 + (*pskb)->skb_tag = setxidinfo->mark;
411 + return XT_CONTINUE;
415 +checkentry_v2(const char *tablename,
417 + const struct xt_target *target,
418 + void *targinfo, unsigned int hook_mask)
420 + struct xt_setxid_target_info_v2 *setxidinfo = targinfo;
422 + if (setxidinfo->mode != XT_SET_PACKET_XID) {
423 + printk(KERN_WARNING "SETXID: unknown mode %u\n",
431 +static struct xt_target xt_setxid_target[] = {
436 + .checkentry = checkentry_v2,
437 + .target = target_v2,
438 + .targetsize = sizeof(struct xt_setxid_target_info_v2),
444 +static int __init init(void)
449 + xt_register_target(xt_setxid_target);
453 +static void __exit fini(void)
455 + xt_unregister_target(xt_setxid_target);