1 From 1d251758b9daeb60cc5afa5ce5aa725cf5ca84a7 Mon Sep 17 00:00:00 2001
2 From: S.Çağlar Onur <caglar@cs.princeton.edu>
3 Date: Tue, 7 Dec 2010 11:06:57 -0500
4 Subject: [PATCH] linux-2.6-522-iptables-connection-tagging.patch
7 include/linux/netfilter/xt_SETXID.h | 13 ++
8 include/net/netfilter/nf_conntrack.h | 3 +
9 net/netfilter/Kconfig | 7 +
10 net/netfilter/Makefile | 1 +
11 net/netfilter/nf_conntrack_core.c | 3 +
12 net/netfilter/xt_MARK.c | 267 +++++++++++++++++++++++++++++++++-
13 net/netfilter/xt_SETXID.c | 77 ++++++++++
14 7 files changed, 364 insertions(+), 7 deletions(-)
15 create mode 100644 include/linux/netfilter/xt_SETXID.h
16 create mode 100644 net/netfilter/xt_SETXID.c
18 diff --git a/include/linux/netfilter/xt_SETXID.h b/include/linux/netfilter/xt_SETXID.h
20 index 0000000..235b9d6
22 +++ b/include/linux/netfilter/xt_SETXID.h
24 +#ifndef _XT_SETXID_H_target
25 +#define _XT_SETXID_H_target
31 +struct xt_setxid_target_info_v2 {
36 +#endif /*_XT_SETXID_H_target*/
37 diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
38 index 5cf7270..95a5fde 100644
39 --- a/include/net/netfilter/nf_conntrack.h
40 +++ b/include/net/netfilter/nf_conntrack.h
41 @@ -119,6 +119,9 @@ struct nf_conn {
42 /* Storage reserved for other modules: */
43 union nf_conntrack_proto proto;
45 + /* PLANETLAB. VNET-specific */
46 + int xid[IP_CT_DIR_MAX];
49 struct nf_ct_ext *ext;
51 diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
52 index 62cf66b..cb15f4a 100644
53 --- a/net/netfilter/Kconfig
54 +++ b/net/netfilter/Kconfig
55 @@ -537,6 +537,13 @@ config NETFILTER_XT_MATCH_CLUSTER
56 If you say Y or M here, try `iptables -m cluster --help` for
59 +config NETFILTER_XT_TARGET_SETXID
60 + tristate '"SETXID" target support'
61 + depends on NETFILTER_XTABLES
63 + This option adds a `SETXID' target, which allows you to alter the
66 config NETFILTER_XT_MATCH_COMMENT
67 tristate '"comment" match support'
68 depends on NETFILTER_ADVANCED
69 diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
70 index b9815a9..25e24c1 100644
71 --- a/net/netfilter/Makefile
72 +++ b/net/netfilter/Makefile
73 @@ -41,6 +41,7 @@ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
74 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
77 +obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o
78 obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
79 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
80 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
81 diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
82 index 1374179..0f9464c 100644
83 --- a/net/netfilter/nf_conntrack_core.c
84 +++ b/net/netfilter/nf_conntrack_core.c
85 @@ -677,6 +677,9 @@ init_conntrack(struct net *net,
86 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
87 &net->ct.unconfirmed);
89 + ct->xid[IP_CT_DIR_ORIGINAL] = -1;
90 + ct->xid[IP_CT_DIR_REPLY] = -1;
92 spin_unlock_bh(&nf_conntrack_lock);
95 diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
96 index 225f8d1..6cb5101 100644
97 --- a/net/netfilter/xt_MARK.c
98 +++ b/net/netfilter/xt_MARK.c
100 #include <linux/module.h>
101 #include <linux/skbuff.h>
102 #include <linux/ip.h>
103 +#include <net/udp.h>
104 #include <net/checksum.h>
105 +#include <net/route.h>
106 +#include <net/inet_hashtables.h>
107 +#include <net/net_namespace.h>
109 +#include <net/netfilter/nf_conntrack.h>
111 #include <linux/netfilter/x_tables.h>
112 #include <linux/netfilter/xt_MARK.h>
113 @@ -24,22 +30,269 @@ MODULE_DESCRIPTION("Xtables: packet mark modification");
114 MODULE_ALIAS("ipt_MARK");
115 MODULE_ALIAS("ip6t_MARK");
117 +DECLARE_PER_CPU(int, sknid_elevator);
119 +#define PEERCRED_SET(x) ((x!=0) && (x!=(unsigned int)-1))
121 +static inline u_int16_t get_dst_port(struct nf_conntrack_tuple *tuple)
123 + switch (tuple->dst.protonum) {
125 + /* XXX Truncate 32-bit GRE key to 16 bits */
126 + return tuple->dst.u.gre.key;
128 + /* Bind on ICMP echo ID */
129 + return tuple->src.u.icmp.id;
131 + return tuple->dst.u.tcp.port;
133 + return tuple->dst.u.udp.port;
135 + return tuple->dst.u.all;
139 +static inline u_int16_t get_src_port(struct nf_conntrack_tuple *tuple)
141 + switch (tuple->dst.protonum) {
143 + /* XXX Truncate 32-bit GRE key to 16 bits */
144 + return htons(ntohl(tuple->src.u.gre.key));
146 + /* Bind on ICMP echo ID */
147 + return tuple->src.u.icmp.id;
149 + return tuple->src.u.tcp.port;
151 + return tuple->src.u.udp.port;
153 + return tuple->src.u.all;
157 +static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
158 + __be16 sport, __be32 daddr, __be16 dport,
159 + int dif, struct udp_table *udptable)
161 + struct sock *sk, *result = NULL;
162 + struct hlist_nulls_node *node;
163 + unsigned short hnum = ntohs(dport);
164 + unsigned int hash = udp_hashfn(net, hnum);
165 + struct udp_hslot *hslot = &udptable->hash[hash];
169 + sk_nulls_for_each_rcu(sk, node, &hslot->head) {
170 + struct inet_sock *inet = inet_sk(sk);
172 + if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
173 + !ipv6_only_sock(sk)) {
174 + int score = (sk->sk_family == PF_INET ? 1 : 0);
176 + if (inet->rcv_saddr) {
177 + if (inet->rcv_saddr != daddr)
181 + /* block non nx_info ips */
182 + if (!v4_addr_in_nx_info(sk->sk_nx_info,
183 + daddr, NXA_MASK_BIND))
187 + if (inet->daddr != saddr)
192 + if (inet->dport != sport)
196 + if (sk->sk_bound_dev_if) {
197 + if (sk->sk_bound_dev_if != dif)
204 + } else if (score > badness) {
220 mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
222 const struct xt_mark_tginfo2 *info = par->targinfo;
224 + enum ip_conntrack_info ctinfo;
225 + struct sock *connection_sk;
227 + struct nf_conn *ct;
228 + extern struct inet_hashinfo tcp_hashinfo;
229 + enum ip_conntrack_dir dir;
233 + u_int16_t proto, src_port;
237 + if (info->mark == ~0U) {
238 + // As of 2.6.27.39, Dec 8 2009,
239 + // NetNS + VNET = Trouble
240 + // Let's handle this as a special case
241 + struct net *net = dev_net(skb->dev);
242 + if (!net_eq(net, &init_net)) {
245 + return XT_CONTINUE;
249 + dif = ((struct rtable *)(skb_dst(skb)))->rt_iif;
251 + ct = nf_ct_get(skb, &ctinfo);
253 + goto out_mark_finish;
255 + dir = CTINFO2DIR(ctinfo);
256 + src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
257 + dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
258 + src_port = get_src_port(&ct->tuplehash[dir].tuple);
259 + proto = ct->tuplehash[dir].tuple.dst.protonum;
261 + ip = ct->tuplehash[dir].tuple.dst.u3.ip;
262 + port = get_dst_port(&ct->tuplehash[dir].tuple);
266 + /* The packet is marked, it's going out */
267 + ct->xid[0] = skb->mark;
269 + if (ct->xid[0] > 0)
271 + } else if (proto == 17) {
274 + sk = __udp4_lib_lookup(net, src_ip, src_port,
275 + ip, port, dif, &udp_table);
277 + if (sk && par->hooknum == NF_INET_LOCAL_IN)
282 + } else if (skb->mark > 0)
283 + /* The packet is marked, it's going out */
284 + ct->xid[0] = skb->mark;
285 + } else if (proto == 6) { /* TCP */
286 + int sockettype = 0; /* Established socket */
288 + /* Looks for an established socket or a listening
289 + socket corresponding to the 4-tuple, in that order.
290 + The order is important for Codemux connections
291 + to be handled properly */
293 + connection_sk = inet_lookup_established(net,
299 + if (!connection_sk) {
300 + connection_sk = inet_lookup_listener(net,
304 + sockettype = 1; /* Listening socket */
307 + if (connection_sk) {
308 + if (connection_sk->sk_state == TCP_TIME_WAIT) {
309 + inet_twsk_put(inet_twsk(connection_sk));
310 + goto out_mark_finish;
313 + /* The peercred is not set. We set it if the other side has an xid. */
315 + (connection_sk->sk_peercred.uid)
316 + && ct->xid[!dir] > 0 && (sockettype == 0)) {
317 + connection_sk->sk_peercred.gid =
318 + connection_sk->sk_peercred.uid =
322 + /* The peercred is set, and is not equal to the XID of 'the other side' */
323 + else if (PEERCRED_SET
324 + (connection_sk->sk_peercred.uid)
325 + && (connection_sk->sk_peercred.uid !=
327 + && (sockettype == 0)) {
328 + mark = connection_sk->sk_peercred.uid;
331 + /* Has this connection already been tagged? */
332 + if (ct->xid[dir] < 1) {
333 + /* No - let's tag it */
334 + ct->xid[dir] = connection_sk->sk_nid;
337 + if (mark == -1 && (ct->xid[dir] != 0))
338 + mark = ct->xid[dir];
340 + sock_put(connection_sk);
343 + /* All else failed. Is this a connection over raw sockets?
344 + That explains why we couldn't get anything out of skb->sk,
345 + or look up a "real" connection. */
346 + if (ct->xid[dir] < 1) {
348 + ct->xid[dir] = skb->skb_tag;
351 + /* Covers CoDemux case */
352 + if (mark < 1 && (ct->xid[dir] > 0))
353 + mark = ct->xid[dir];
355 + if (mark < 1 && (ct->xid[!dir] > 0))
356 + mark = ct->xid[!dir];
357 + goto out_mark_finish;
360 + mark = (skb->mark & ~info->mask) ^ info->mark;
366 + curtag = &__get_cpu_var(sknid_elevator);
367 + if (mark > 0 && *curtag == -2 && par->hooknum == NF_INET_LOCAL_IN)
370 - skb->mark = (skb->mark & ~info->mask) ^ info->mark;
374 static struct xt_target mark_tg_reg __read_mostly = {
377 - .family = NFPROTO_UNSPEC,
379 - .targetsize = sizeof(struct xt_mark_tginfo2),
383 + .family = NFPROTO_UNSPEC,
385 + .targetsize = sizeof(struct xt_mark_tginfo2),
389 static int __init mark_tg_init(void)
390 diff --git a/net/netfilter/xt_SETXID.c b/net/netfilter/xt_SETXID.c
392 index 0000000..f8553c5
394 +++ b/net/netfilter/xt_SETXID.c
396 +#include <linux/module.h>
397 +#include <linux/skbuff.h>
398 +#include <linux/ip.h>
399 +#include <net/checksum.h>
400 +#include <linux/vs_network.h>
402 +#include <linux/netfilter/x_tables.h>
403 +#include <linux/netfilter/xt_SETXID.h>
405 +MODULE_LICENSE("GPL");
407 +MODULE_DESCRIPTION("");
408 +MODULE_ALIAS("ipt_SETXID");
411 +target_v2(struct sk_buff *skb,
412 + const struct xt_target_param *par)
417 + const struct xt_setxid_target_info_v2 *setxidinfo = par->targinfo;
419 + switch (setxidinfo->mode) {
420 + case XT_SET_PACKET_XID:
421 + skb->skb_tag = setxidinfo->mark;
424 + return XT_CONTINUE;
428 +checkentry_v2(const struct xt_tgchk_param *par)
433 + struct xt_setxid_target_info_v2 *setxidinfo = par->targinfo;
435 + if (setxidinfo->mode != XT_SET_PACKET_XID) {
436 + printk(KERN_WARNING "SETXID: unknown mode %u\n",
444 +static struct xt_target xt_setxid_target[] = {
449 + .checkentry = checkentry_v2,
450 + .target = target_v2,
451 + .targetsize = sizeof(struct xt_setxid_target_info_v2),
457 +static int __init init(void)
462 + xt_register_target(xt_setxid_target);
466 +static void __exit fini(void)
468 + xt_unregister_target(xt_setxid_target);