trying to fix following warning: 'override: reassigning to symbol SYSFS_DEPRECATED_V2'
[linux-2.6.git] / linux-2.6-522-iptables-connection-tagging.patch
1 From 1d251758b9daeb60cc5afa5ce5aa725cf5ca84a7 Mon Sep 17 00:00:00 2001
2 From: S.Çağlar Onur <caglar@cs.princeton.edu>
3 Date: Tue, 7 Dec 2010 11:06:57 -0500
4 Subject: [PATCH] linux-2.6-522-iptables-connection-tagging.patch
5
6 ---
7  include/linux/netfilter/xt_SETXID.h  |   13 ++
8  include/net/netfilter/nf_conntrack.h |    3 +
9  net/netfilter/Kconfig                |    7 +
10  net/netfilter/Makefile               |    1 +
11  net/netfilter/nf_conntrack_core.c    |    3 +
12  net/netfilter/xt_MARK.c              |  267 +++++++++++++++++++++++++++++++++-
13  net/netfilter/xt_SETXID.c            |   77 ++++++++++
14  7 files changed, 364 insertions(+), 7 deletions(-)
15  create mode 100644 include/linux/netfilter/xt_SETXID.h
16  create mode 100644 net/netfilter/xt_SETXID.c
17
18 diff --git a/include/linux/netfilter/xt_SETXID.h b/include/linux/netfilter/xt_SETXID.h
19 new file mode 100644
20 index 0000000..235b9d6
21 --- /dev/null
22 +++ b/include/linux/netfilter/xt_SETXID.h
23 @@ -0,0 +1,13 @@
24 +#ifndef _XT_SETXID_H_target
25 +#define _XT_SETXID_H_target
26 +
27 +enum {
28 +       XT_SET_PACKET_XID=0
29 +};
30 +
31 +struct xt_setxid_target_info_v2 {
32 +       unsigned long mark;
33 +       u_int8_t mode;
34 +};
35 +
36 +#endif /*_XT_SETXID_H_target*/
37 diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
38 index 5cf7270..95a5fde 100644
39 --- a/include/net/netfilter/nf_conntrack.h
40 +++ b/include/net/netfilter/nf_conntrack.h
41 @@ -119,6 +119,9 @@ struct nf_conn {
42         /* Storage reserved for other modules: */
43         union nf_conntrack_proto proto;
44  
45 +       /* PLANETLAB. VNET-specific */
46 +       int xid[IP_CT_DIR_MAX];
47 +       
48         /* Extensions */
49         struct nf_ct_ext *ext;
50  #ifdef CONFIG_NET_NS
51 diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
52 index 62cf66b..cb15f4a 100644
53 --- a/net/netfilter/Kconfig
54 +++ b/net/netfilter/Kconfig
55 @@ -537,6 +537,13 @@ config NETFILTER_XT_MATCH_CLUSTER
56           If you say Y or M here, try `iptables -m cluster --help` for
57           more information.
58  
59 +config NETFILTER_XT_TARGET_SETXID
60 +       tristate '"SETXID" target support'
61 +       depends on NETFILTER_XTABLES
62 +       help
63 +         This option adds a `SETXID' target, which allows you to alter the
64 +         xid of a socket.
65 +
66  config NETFILTER_XT_MATCH_COMMENT
67         tristate  '"comment" match support'
68         depends on NETFILTER_ADVANCED
69 diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
70 index b9815a9..25e24c1 100644
71 --- a/net/netfilter/Makefile
72 +++ b/net/netfilter/Makefile
73 @@ -41,6 +41,7 @@ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
74  obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
75  
76  # targets
77 +obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o
78  obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
79  obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
80  obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
81 diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
82 index 1374179..0f9464c 100644
83 --- a/net/netfilter/nf_conntrack_core.c
84 +++ b/net/netfilter/nf_conntrack_core.c
85 @@ -677,6 +677,9 @@ init_conntrack(struct net *net,
86         hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
87                        &net->ct.unconfirmed);
88  
89 +       ct->xid[IP_CT_DIR_ORIGINAL] = -1;
90 +       ct->xid[IP_CT_DIR_REPLY] = -1;
91 +
92         spin_unlock_bh(&nf_conntrack_lock);
93  
94         if (exp) {
95 diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
96 index 225f8d1..6cb5101 100644
97 --- a/net/netfilter/xt_MARK.c
98 +++ b/net/netfilter/xt_MARK.c
99 @@ -13,7 +13,13 @@
100  #include <linux/module.h>
101  #include <linux/skbuff.h>
102  #include <linux/ip.h>
103 +#include <net/udp.h>
104  #include <net/checksum.h>
105 +#include <net/route.h>
106 +#include <net/inet_hashtables.h>
107 +#include <net/net_namespace.h>
108 +
109 +#include <net/netfilter/nf_conntrack.h>
110  
111  #include <linux/netfilter/x_tables.h>
112  #include <linux/netfilter/xt_MARK.h>
113 @@ -24,22 +30,269 @@ MODULE_DESCRIPTION("Xtables: packet mark modification");
114  MODULE_ALIAS("ipt_MARK");
115  MODULE_ALIAS("ip6t_MARK");
116  
117 +DECLARE_PER_CPU(int, sknid_elevator);
118 +
119 +#define PEERCRED_SET(x) ((x!=0) && (x!=(unsigned int)-1))
120 +
121 +static inline u_int16_t get_dst_port(struct nf_conntrack_tuple *tuple)
122 +{
123 +       switch (tuple->dst.protonum) {
124 +       case IPPROTO_GRE:
125 +               /* XXX Truncate 32-bit GRE key to 16 bits */
126 +               return tuple->dst.u.gre.key;
127 +       case IPPROTO_ICMP:
128 +               /* Bind on ICMP echo ID */
129 +               return tuple->src.u.icmp.id;
130 +       case IPPROTO_TCP:
131 +               return tuple->dst.u.tcp.port;
132 +       case IPPROTO_UDP:
133 +               return tuple->dst.u.udp.port;
134 +       default:
135 +               return tuple->dst.u.all;
136 +       }
137 +}
138 +
139 +static inline u_int16_t get_src_port(struct nf_conntrack_tuple *tuple)
140 +{
141 +       switch (tuple->dst.protonum) {
142 +       case IPPROTO_GRE:
143 +               /* XXX Truncate 32-bit GRE key to 16 bits */
144 +               return htons(ntohl(tuple->src.u.gre.key));
145 +       case IPPROTO_ICMP:
146 +               /* Bind on ICMP echo ID */
147 +               return tuple->src.u.icmp.id;
148 +       case IPPROTO_TCP:
149 +               return tuple->src.u.tcp.port;
150 +       case IPPROTO_UDP:
151 +               return tuple->src.u.udp.port;
152 +       default:
153 +               return tuple->src.u.all;
154 +       }
155 +}
156 +
157 +static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
158 +                                     __be16 sport, __be32 daddr, __be16 dport,
159 +                                     int dif, struct udp_table *udptable)
160 +{
161 +       struct sock *sk, *result = NULL;
162 +       struct hlist_nulls_node *node;
163 +       unsigned short hnum = ntohs(dport);
164 +       unsigned int hash = udp_hashfn(net, hnum);
165 +       struct udp_hslot *hslot = &udptable->hash[hash];
166 +       int badness = -1;
167 +
168 +       rcu_read_lock();
169 +       sk_nulls_for_each_rcu(sk, node, &hslot->head) {
170 +               struct inet_sock *inet = inet_sk(sk);
171 +
172 +               if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum &&
173 +                   !ipv6_only_sock(sk)) {
174 +                       int score = (sk->sk_family == PF_INET ? 1 : 0);
175 +
176 +                       if (inet->rcv_saddr) {
177 +                               if (inet->rcv_saddr != daddr)
178 +                                       continue;
179 +                               score += 2;
180 +                       } else {
181 +                               /* block non nx_info ips */
182 +                               if (!v4_addr_in_nx_info(sk->sk_nx_info,
183 +                                                       daddr, NXA_MASK_BIND))
184 +                                       continue;
185 +                       }
186 +                       if (inet->daddr) {
187 +                               if (inet->daddr != saddr)
188 +                                       continue;
189 +                               score += 2;
190 +                       }
191 +                       if (inet->dport) {
192 +                               if (inet->dport != sport)
193 +                                       continue;
194 +                               score += 2;
195 +                       }
196 +                       if (sk->sk_bound_dev_if) {
197 +                               if (sk->sk_bound_dev_if != dif)
198 +                                       continue;
199 +                               score += 2;
200 +                       }
201 +                       if (score == 9) {
202 +                               result = sk;
203 +                               break;
204 +                       } else if (score > badness) {
205 +                               result = sk;
206 +                               badness = score;
207 +                       }
208 +               }
209 +       }
210 +
211 +       if (result)
212 +               sock_hold(result);
213 +       rcu_read_unlock();
214 +       return result;
215 +}
216 +
217 +int onceonly = 1;
218 +
219  static unsigned int
220  mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
221  {
222         const struct xt_mark_tginfo2 *info = par->targinfo;
223 +       long mark = -1;
224 +       enum ip_conntrack_info ctinfo;
225 +       struct sock *connection_sk;
226 +       int dif;
227 +       struct nf_conn *ct;
228 +       extern struct inet_hashinfo tcp_hashinfo;
229 +       enum ip_conntrack_dir dir;
230 +       int *curtag;
231 +       u_int32_t src_ip;
232 +       u_int32_t dst_ip;
233 +       u_int16_t proto, src_port;
234 +       u_int32_t ip;
235 +       u_int16_t port;
236 +
237 +       if (info->mark == ~0U) {
238 +               // As of 2.6.27.39, Dec 8 2009, 
239 +               // NetNS + VNET = Trouble
240 +               // Let's handle this as a special case
241 +               struct net *net = dev_net(skb->dev);
242 +               if (!net_eq(net, &init_net)) {
243 +                       WARN_ON(onceonly);
244 +                       onceonly = 0;
245 +                       return XT_CONTINUE;
246 +               }
247 +
248 +               /* copy-xid */
249 +               dif = ((struct rtable *)(skb_dst(skb)))->rt_iif;
250 +
251 +               ct = nf_ct_get(skb, &ctinfo);
252 +               if (!ct)
253 +                       goto out_mark_finish;
254 +
255 +               dir = CTINFO2DIR(ctinfo);
256 +               src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
257 +               dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
258 +               src_port = get_src_port(&ct->tuplehash[dir].tuple);
259 +               proto = ct->tuplehash[dir].tuple.dst.protonum;
260 +
261 +               ip = ct->tuplehash[dir].tuple.dst.u3.ip;
262 +               port = get_dst_port(&ct->tuplehash[dir].tuple);
263 +
264 +               if (proto == 1) {
265 +                       if (skb->mark > 0)
266 +                               /* The packet is marked, it's going out */
267 +                               ct->xid[0] = skb->mark;
268 +
269 +                       if (ct->xid[0] > 0)
270 +                               mark = ct->xid[0];
271 +               } else if (proto == 17) {
272 +                       struct sock *sk;
273 +                       if (!skb->mark) {
274 +                               sk = __udp4_lib_lookup(net, src_ip, src_port,
275 +                                                      ip, port, dif, &udp_table);
276 +
277 +                               if (sk && par->hooknum == NF_INET_LOCAL_IN)
278 +                                       mark = sk->sk_nid;
279 +
280 +                               if (sk)
281 +                                       sock_put(sk);
282 +                       } else if (skb->mark > 0)
283 +                               /* The packet is marked, it's going out */
284 +                               ct->xid[0] = skb->mark;
285 +               } else if (proto == 6) {        /* TCP */
286 +                       int sockettype = 0;     /* Established socket */
287 +
288 +                       /* Looks for an established socket or a listening 
289 +                          socket corresponding to the 4-tuple, in that order.
290 +                          The order is important for Codemux connections
291 +                          to be handled properly */
292 +
293 +                       connection_sk = inet_lookup_established(net,
294 +                                                               &tcp_hashinfo,
295 +                                                               src_ip,
296 +                                                               src_port, ip,
297 +                                                               port, dif);
298 +
299 +                       if (!connection_sk) {
300 +                               connection_sk = inet_lookup_listener(net,
301 +                                                                    &tcp_hashinfo,
302 +                                                                    ip, port,
303 +                                                                    dif);
304 +                               sockettype = 1; /* Listening socket */
305 +                       }
306 +
307 +                       if (connection_sk) {
308 +                               if (connection_sk->sk_state == TCP_TIME_WAIT) {
309 +                                       inet_twsk_put(inet_twsk(connection_sk));
310 +                                       goto out_mark_finish;
311 +                               }
312 +
313 +                               /* The peercred is not set. We set it if the other side has an xid. */
314 +                               if (!PEERCRED_SET
315 +                                   (connection_sk->sk_peercred.uid)
316 +                                   && ct->xid[!dir] > 0 && (sockettype == 0)) {
317 +                                       connection_sk->sk_peercred.gid =
318 +                                           connection_sk->sk_peercred.uid =
319 +                                           ct->xid[!dir];
320 +                               }
321 +
322 +                               /* The peercred is set, and is not equal to the XID of 'the other side' */
323 +                               else if (PEERCRED_SET
324 +                                        (connection_sk->sk_peercred.uid)
325 +                                        && (connection_sk->sk_peercred.uid !=
326 +                                            ct->xid[!dir])
327 +                                        && (sockettype == 0)) {
328 +                                       mark = connection_sk->sk_peercred.uid;
329 +                               }
330 +
331 +                               /* Has this connection already been tagged? */
332 +                               if (ct->xid[dir] < 1) {
333 +                                       /* No - let's tag it */
334 +                                       ct->xid[dir] = connection_sk->sk_nid;
335 +                               }
336 +
337 +                               if (mark == -1 && (ct->xid[dir] != 0))
338 +                                       mark = ct->xid[dir];
339 +
340 +                               sock_put(connection_sk);
341 +                       }
342 +
343 +                       /* All else failed. Is this a connection over raw sockets?
344 +                          That explains why we couldn't get anything out of skb->sk,
345 +                          or look up a "real" connection. */
346 +                       if (ct->xid[dir] < 1) {
347 +                               if (skb->skb_tag)
348 +                                       ct->xid[dir] = skb->skb_tag;
349 +                       }
350 +
351 +                       /* Covers CoDemux case */
352 +                       if (mark < 1 && (ct->xid[dir] > 0))
353 +                               mark = ct->xid[dir];
354 +
355 +                       if (mark < 1 && (ct->xid[!dir] > 0))
356 +                               mark = ct->xid[!dir];
357 +                       goto out_mark_finish;
358 +               }
359 +       } else
360 +               mark = (skb->mark & ~info->mask) ^ info->mark;
361 +
362 +out_mark_finish:
363 +       if (mark != -1)
364 +               skb->mark = mark;
365 +
366 +       curtag = &__get_cpu_var(sknid_elevator);
367 +       if (mark > 0 && *curtag == -2 && par->hooknum == NF_INET_LOCAL_IN)
368 +               *curtag = mark;
369  
370 -       skb->mark = (skb->mark & ~info->mask) ^ info->mark;
371         return XT_CONTINUE;
372  }
373  
374  static struct xt_target mark_tg_reg __read_mostly = {
375 -       .name           = "MARK",
376 -       .revision       = 2,
377 -       .family         = NFPROTO_UNSPEC,
378 -       .target         = mark_tg,
379 -       .targetsize     = sizeof(struct xt_mark_tginfo2),
380 -       .me             = THIS_MODULE,
381 +       .name = "MARK",
382 +       .revision = 2,
383 +       .family = NFPROTO_UNSPEC,
384 +       .target = mark_tg,
385 +       .targetsize = sizeof(struct xt_mark_tginfo2),
386 +       .me = THIS_MODULE,
387  };
388  
389  static int __init mark_tg_init(void)
390 diff --git a/net/netfilter/xt_SETXID.c b/net/netfilter/xt_SETXID.c
391 new file mode 100644
392 index 0000000..f8553c5
393 --- /dev/null
394 +++ b/net/netfilter/xt_SETXID.c
395 @@ -0,0 +1,77 @@
396 +#include <linux/module.h>
397 +#include <linux/skbuff.h>
398 +#include <linux/ip.h>
399 +#include <net/checksum.h>
400 +#include <linux/vs_network.h>
401 +
402 +#include <linux/netfilter/x_tables.h>
403 +#include <linux/netfilter/xt_SETXID.h>
404 +
405 +MODULE_LICENSE("GPL");
406 +MODULE_AUTHOR("");
407 +MODULE_DESCRIPTION("");
408 +MODULE_ALIAS("ipt_SETXID");
409 +
410 +static unsigned int
411 +target_v2(struct sk_buff *skb,
412 +         const struct xt_target_param *par)
413 +
414 +
415 +
416 +{
417 +       const struct xt_setxid_target_info_v2 *setxidinfo = par->targinfo;
418 +
419 +       switch (setxidinfo->mode) {
420 +       case XT_SET_PACKET_XID:
421 +               skb->skb_tag = setxidinfo->mark;
422 +               break;
423 +       }
424 +       return XT_CONTINUE;
425 +}
426 +
427 +static bool
428 +checkentry_v2(const struct xt_tgchk_param *par)
429 +
430 +
431 +
432 +{
433 +       struct xt_setxid_target_info_v2 *setxidinfo = par->targinfo;
434 +
435 +       if (setxidinfo->mode != XT_SET_PACKET_XID) {
436 +               printk(KERN_WARNING "SETXID: unknown mode %u\n",
437 +                      setxidinfo->mode);
438 +               return 0;
439 +       }
440 +
441 +       return 1;
442 +}
443 +
444 +static struct xt_target xt_setxid_target[] = {
445 +       {
446 +        .name = "SETXID",
447 +        .family = AF_INET,
448 +        .revision = 2,
449 +        .checkentry = checkentry_v2,
450 +        .target = target_v2,
451 +        .targetsize = sizeof(struct xt_setxid_target_info_v2),
452 +        .table = "mangle",
453 +        .me = THIS_MODULE,
454 +        }
455 +};
456 +
457 +static int __init init(void)
458 +{
459 +       int err;
460 +
461 +       err =
462 +           xt_register_target(xt_setxid_target);
463 +       return err;
464 +}
465 +
466 +static void __exit fini(void)
467 +{
468 +       xt_unregister_target(xt_setxid_target);
469 +}
470 +
471 +module_init(init);
472 +module_exit(fini);
473 -- 
474 1.5.4.3
475