Bump release.
[linux-2.6.git] / linux-2.6-522-iptables-connection-tagging.patch
1 diff -Nurb linux-2.6.22-521/include/linux/netfilter/xt_MARK.h linux-2.6.22-522/include/linux/netfilter/xt_MARK.h
2 --- linux-2.6.22-521/include/linux/netfilter/xt_MARK.h  2007-07-08 19:32:17.000000000 -0400
3 +++ linux-2.6.22-522/include/linux/netfilter/xt_MARK.h  2008-09-17 17:59:53.000000000 -0400
4 @@ -11,6 +11,7 @@
5         XT_MARK_SET=0,
6         XT_MARK_AND,
7         XT_MARK_OR,
8 +       XT_MARK_COPYXID,
9  };
10  
11  struct xt_mark_target_info_v1 {
12 diff -Nurb linux-2.6.22-521/include/linux/netfilter/xt_SETXID.h linux-2.6.22-522/include/linux/netfilter/xt_SETXID.h
13 --- linux-2.6.22-521/include/linux/netfilter/xt_SETXID.h        1969-12-31 19:00:00.000000000 -0500
14 +++ linux-2.6.22-522/include/linux/netfilter/xt_SETXID.h        2008-09-17 17:59:53.000000000 -0400
15 @@ -0,0 +1,14 @@
16 +#ifndef _XT_SETXID_H_target
17 +#define _XT_SETXID_H_target
18 +
19 +/* Version 1 */
20 +enum {
21 +       XT_SET_PACKET_XID=0
22 +};
23 +
24 +struct xt_setxid_target_info_v1 {
25 +       unsigned long mark;
26 +       u_int8_t mode;
27 +};
28 +
29 +#endif /*_XT_SETXID_H_target*/
30 diff -Nurb linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_MARK.h linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_MARK.h
31 --- linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_MARK.h    2007-07-08 19:32:17.000000000 -0400
32 +++ linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_MARK.h    2008-09-17 17:59:53.000000000 -0400
33 @@ -12,6 +12,7 @@
34  #define IPT_MARK_SET   XT_MARK_SET
35  #define IPT_MARK_AND   XT_MARK_AND
36  #define        IPT_MARK_OR     XT_MARK_OR
37 +#define IPT_MARK_COPYXID       XT_MARK_COPYXID
38  
39  #define ipt_mark_target_info_v1 xt_mark_target_info_v1
40  
41 diff -Nurb linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_SETXID.h linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_SETXID.h
42 --- linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_SETXID.h  1969-12-31 19:00:00.000000000 -0500
43 +++ linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_SETXID.h  2008-09-17 17:59:53.000000000 -0400
44 @@ -0,0 +1,13 @@
45 +#ifndef _IPT_SETXID_H_target
46 +#define _IPT_SETXID_H_target
47 +
48 +/* Backwards compatibility for old userspace */
49 +
50 +#include <linux/netfilter/xt_SETXID.h>
51 +
52 +/* Version 1 */
53 +#define IPT_SET_PACKET_XID     XT_SET_PACKET_XID
54 +
55 +#define ipt_setxid_target_info_v1 xt_setxid_target_info_v1
56 +
57 +#endif /*_IPT_SETXID_H_target*/
58 diff -Nurb linux-2.6.22-521/include/net/netfilter/nf_conntrack.h linux-2.6.22-522/include/net/netfilter/nf_conntrack.h
59 --- linux-2.6.22-521/include/net/netfilter/nf_conntrack.h       2007-07-08 19:32:17.000000000 -0400
60 +++ linux-2.6.22-522/include/net/netfilter/nf_conntrack.h       2008-09-17 17:59:53.000000000 -0400
61 @@ -131,6 +131,9 @@
62         /* Storage reserved for other modules: */
63         union nf_conntrack_proto proto;
64  
65 +       /* PLANETLAB. VNET-specific */
66 +       int xid[IP_CT_DIR_MAX];
67 +
68         /* features dynamically at the end: helper, nat (both optional) */
69         char data[0];
70  };
71 diff -Nurb linux-2.6.22-521/net/netfilter/Kconfig linux-2.6.22-522/net/netfilter/Kconfig
72 --- linux-2.6.22-521/net/netfilter/Kconfig      2007-07-08 19:32:17.000000000 -0400
73 +++ linux-2.6.22-522/net/netfilter/Kconfig      2008-09-17 17:59:53.000000000 -0400
74 @@ -389,6 +389,13 @@
75  
76           To compile it as a module, choose M here.  If unsure, say N.
77  
78 +config NETFILTER_XT_TARGET_SETXID
79 +       tristate '"SETXID" target support'
80 +       depends on NETFILTER_XTABLES
81 +       help
82 +         This option adds a `SETXID' target, which allows you to alter the
83 +         xid of a socket.
84 +
85  config NETFILTER_XT_MATCH_COMMENT
86         tristate  '"comment" match support'
87         depends on NETFILTER_XTABLES
88 diff -Nurb linux-2.6.22-521/net/netfilter/Makefile linux-2.6.22-522/net/netfilter/Makefile
89 --- linux-2.6.22-521/net/netfilter/Makefile     2007-07-08 19:32:17.000000000 -0400
90 +++ linux-2.6.22-522/net/netfilter/Makefile     2008-09-17 17:59:53.000000000 -0400
91 @@ -37,6 +37,7 @@
92  obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
93  
94  # targets
95 +obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o
96  obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
97  obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
98  obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
99 diff -Nurb linux-2.6.22-521/net/netfilter/nf_conntrack_core.c linux-2.6.22-522/net/netfilter/nf_conntrack_core.c
100 --- linux-2.6.22-521/net/netfilter/nf_conntrack_core.c  2007-07-08 19:32:17.000000000 -0400
101 +++ linux-2.6.22-522/net/netfilter/nf_conntrack_core.c  2008-09-17 17:59:53.000000000 -0400
102 @@ -726,6 +726,8 @@
103  
104         /* Overload tuple linked list to put us in unconfirmed list. */
105         list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
106 +       conntrack->xid[IP_CT_DIR_ORIGINAL] = -1;
107 +       conntrack->xid[IP_CT_DIR_REPLY] = -1;
108  
109         write_unlock_bh(&nf_conntrack_lock);
110  
111 diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilter/xt_MARK.c
112 --- linux-2.6.22-521/net/netfilter/xt_MARK.c    2007-07-08 19:32:17.000000000 -0400
113 +++ linux-2.6.22-522/net/netfilter/xt_MARK.c    2008-09-17 18:29:52.000000000 -0400
114 @@ -5,13 +5,19 @@
115   * This program is free software; you can redistribute it and/or modify
116   * it under the terms of the GNU General Public License version 2 as
117   * published by the Free Software Foundation.
118 + *
119   */
120  
121  #include <linux/module.h>
122 +#include <linux/version.h>
123  #include <linux/skbuff.h>
124  #include <linux/ip.h>
125 +#include <net/udp.h>
126  #include <net/checksum.h>
127 +#include <net/route.h>
128 +#include <net/inet_hashtables.h>
129  
130 +#include <net/netfilter/nf_conntrack.h>
131  #include <linux/netfilter/x_tables.h>
132  #include <linux/netfilter/xt_MARK.h>
133  
134 @@ -21,6 +27,50 @@
135  MODULE_ALIAS("ipt_MARK");
136  MODULE_ALIAS("ip6t_MARK");
137  
138 +#define PEERCRED_SET(x) ((x!=0) && (x!=(unsigned int)-1)) 
139 +
140 +static inline u_int16_t
141 +get_dst_port(struct nf_conntrack_tuple *tuple)
142 +{
143 +       switch (tuple->dst.protonum) {
144 +       case IPPROTO_GRE:
145 +               /* XXX Truncate 32-bit GRE key to 16 bits */
146 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
147 +               return tuple->dst.u.gre.key;
148 +#else
149 +               return htons(ntohl(tuple->dst.u.gre.key));
150 +#endif  
151 +       case IPPROTO_ICMP:
152 +               /* Bind on ICMP echo ID */
153 +               return tuple->src.u.icmp.id;
154 +       case IPPROTO_TCP:
155 +               return tuple->dst.u.tcp.port;
156 +       case IPPROTO_UDP:
157 +               return tuple->dst.u.udp.port;
158 +       default:
159 +               return tuple->dst.u.all;
160 +       }
161 +}
162 +
163 +static inline u_int16_t
164 +get_src_port(struct nf_conntrack_tuple *tuple)
165 +{
166 +       switch (tuple->dst.protonum) {
167 +       case IPPROTO_GRE:
168 +               /* XXX Truncate 32-bit GRE key to 16 bits */
169 +               return htons(ntohl(tuple->src.u.gre.key));
170 +       case IPPROTO_ICMP:
171 +               /* Bind on ICMP echo ID */
172 +               return tuple->src.u.icmp.id;
173 +       case IPPROTO_TCP:
174 +               return tuple->src.u.tcp.port;
175 +       case IPPROTO_UDP:
176 +               return tuple->src.u.udp.port;
177 +       default:
178 +               return tuple->src.u.all;
179 +       }
180 +}
181 +
182  static unsigned int
183  target_v0(struct sk_buff **pskb,
184           const struct net_device *in,
185 @@ -35,6 +85,68 @@
186         return XT_CONTINUE;
187  }
188  
189 +extern DEFINE_PER_CPU(int, sknid_elevator);
190 +
191 +static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
192 +                      __be32 daddr, __be16 dport,
193 +                      int dif, struct hlist_head udptable[])
194 +{
195 +    struct sock *sk, *result = NULL;
196 +    struct hlist_node *node;
197 +    unsigned short hnum = ntohs(dport);
198 +    int badness = -1;
199 +
200 +    read_lock(&udp_hash_lock);
201 +
202 +    sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
203 +        struct inet_sock *inet = inet_sk(sk);
204 +
205 +        if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
206 +            int score = (sk->sk_family == PF_INET ? 1 : 0);
207 +
208 +            if (inet->rcv_saddr) {
209 +                if (inet->rcv_saddr != daddr)
210 +                    continue;
211 +                score+=2;
212 +            } else {
213 +                /* block non nx_info ips */
214 +                if (!v4_addr_in_nx_info(sk->sk_nx_info,
215 +                    daddr, NXA_MASK_BIND))
216 +                    continue;
217 +            }
218 +            if (inet->daddr) {
219 +                if (inet->daddr != saddr)
220 +                    continue;
221 +                score+=2;
222 +            }
223 +            if (inet->dport) {
224 +                if (inet->dport != sport)
225 +                    continue;
226 +                score+=2;
227 +            }
228 +            if (sk->sk_bound_dev_if) {
229 +                if (sk->sk_bound_dev_if != dif)
230 +                    continue;
231 +                score+=2;
232 +            }
233 +            if (score == 9) {
234 +                result = sk;
235 +                break;
236 +            } else if (score > badness) {
237 +                result = sk;
238 +                badness = score;
239 +            }
240 +        }
241 +    }
242 +
243 +    if (result)
244 +        sock_hold(result);
245 +    read_unlock(&udp_hash_lock);
246 +    return result;
247 +}
248 +
249 +#define related(ct) (ct==(IP_CT_IS_REPLY + IP_CT_RELATED))
250 +
251  static unsigned int
252  target_v1(struct sk_buff **pskb,
253           const struct net_device *in,
254 @@ -44,7 +156,20 @@
255           const void *targinfo)
256  {
257         const struct xt_mark_target_info_v1 *markinfo = targinfo;
258 -       int mark = 0;
259 +               enum ip_conntrack_info ctinfo;
260 +               struct sock *connection_sk;
261 +               int dif;
262 +               struct nf_conn *ct;
263 +               extern struct inet_hashinfo tcp_hashinfo;
264 +               enum ip_conntrack_dir dir;
265 +               int *curtag;
266 +               u_int32_t src_ip;
267 +               u_int32_t dst_ip;
268 +               u_int16_t proto, src_port;
269 +               u_int32_t ip;
270 +               u_int16_t port;
271 +
272 +               int mark = -1;
273  
274         switch (markinfo->mode) {
275         case XT_MARK_SET:
276 @@ -58,13 +183,126 @@
277         case XT_MARK_OR:
278                 mark = (*pskb)->mark | markinfo->mark;
279                 break;
280 +
281 +                               case XT_MARK_COPYXID: 
282 +                                               dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
283 +
284 +                                               ct = nf_ct_get((*pskb), &ctinfo);
285 +                                               if (!ct) 
286 +                                                               break;
287 +
288 +                                               dir = CTINFO2DIR(ctinfo);
289 +                                               src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
290 +                                               dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
291 +                                               src_port = get_src_port(&ct->tuplehash[dir].tuple);
292 +                                               proto = ct->tuplehash[dir].tuple.dst.protonum;
293 +
294 +                                               ip = ct->tuplehash[dir].tuple.dst.u3.ip;
295 +                                               port = get_dst_port(&ct->tuplehash[dir].tuple);
296 +
297 +                                               if (proto == 1) {
298 +                                                               if ((*pskb)->mark>0) /* The packet is marked, it's going out */
299 +                                                               {
300 +                                                                               ct->xid[0]=(*pskb)->mark;
301 +                                                               }
302 +
303 +                                                               if (ct->xid[0] > 0) {
304 +                                                                               mark = ct->xid[0];
305 +                                                               }
306 +                                               }
307 +                                               else if (proto == 17) {
308 +                                                               struct sock *sk;
309 +                                                               if (!(*pskb)->mark) {
310 +                                                                               sk = __udp4_lib_lookup(src_ip, src_port, ip, port,
311 +                                                                                                               dif, udp_hash);
312 +
313 +                                                                               if (sk && hooknum==NF_IP_LOCAL_IN) {
314 +                                                                                               mark=sk->sk_nid;
315 +                                                                               }
316 +
317 +                                                                               if (sk) {
318 +                                                                                               sock_put(sk);
319 +                                                                               }
320 +                                                               }
321 +                                                               else
322 +                                                                               if ((*pskb)->mark>0) /* The packet is marked, it's going out */
323 +                                                                               {
324 +                                                                                               ct->xid[0]=(*pskb)->mark;
325 +                                                                               }
326 +                                               }
327 +                                               else if (proto == 6) /* TCP */{
328 +                                                               int sockettype=0; /* Established socket */
329 +                                                               /* Looks for an established socket or a listening socket corresponding to the 4-tuple, in
330 +                                                                * that order. The order is important for Codemux connections to be handled properly */
331 +
332 +                                                               connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
333 +
334 +                                                               if (!connection_sk) {
335 +                                                                               connection_sk = inet_lookup_listener(&tcp_hashinfo, ip, port, dif);
336 +                                                                               sockettype=1; /* Listening socket */
337 +                                                               }
338 +
339 +                                                               if (connection_sk) {
340 +                                                                               /* The peercred is not set. We set it if the other side has an xid. */
341 +                                                                               if (!PEERCRED_SET(connection_sk->sk_peercred.uid)
342 +                                                                                                               && ct->xid[!dir]>0 && (sockettype==0)) {
343 +                                                                                               connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[!dir];
344 +                                                                               }
345 +
346 +                                                                               /* The peercred is set, and is not equal to the XID of 'the other side' */
347 +                                                                               else if (PEERCRED_SET(connection_sk->sk_peercred.uid) && (connection_sk->sk_peercred.uid != ct->xid[!dir]) && (sockettype==0)) {
348 +                                                                                               mark = connection_sk->sk_peercred.uid;
349 +                                                                               }
350 +
351 +                                                                               /* Has this connection already been tagged? */
352 +                                                                               if (ct->xid[dir] < 1) {
353 +                                                                                               /* No - let's tag it */ 
354 +                                                                                               ct->xid[dir]=connection_sk->sk_nid;
355 +
356 +                                                                               }
357 +
358 +                                                                               if (mark==-1 && (ct->xid[dir]!= 0))
359 +                                                                                               mark = ct->xid[dir];
360 +
361 +                                                                               if (connection_sk->sk_state == TCP_TIME_WAIT) {
362 +                                                                                               inet_twsk_put(inet_twsk(connection_sk));
363 +                                                                                               break;
364 +                                                                               }
365 +                                                                               else
366 +                                                                                               sock_put(connection_sk);
367         }
368  
369 +                                                               /* All else failed. Is this a connection over raw sockets? That explains
370 +                                                                * why we couldn't get anything out of skb->sk, or look up a "real" connection.*/
371 +                                                               if (ct->xid[dir]<1) {
372 +                                                                               if ((*pskb)->skb_tag) {
373 +                                                                                               ct->xid[dir]=(*pskb)->skb_tag;
374 +                                                                               }
375 +                                                               }
376 +
377 +                                                               /* Covers CoDemux case */
378 +                                                               if (mark < 1 && (ct->xid[dir]>0)) {
379 +                                                                               mark = ct->xid[dir];
380 +                                                               }
381 +
382 +                                                               if (mark < 1 && (ct->xid[!dir]>0)) {
383 +                                                                               mark = ct->xid[!dir];
384 +                                                               }
385 +                                                               break;
386 +                                               }
387 +               }
388 +               if (mark != -1) {
389         (*pskb)->mark = mark;
390 +               }
391 +
392 +               curtag=&__get_cpu_var(sknid_elevator);
393 +               if (mark > 0 && *curtag==-2 && hooknum==NF_IP_LOCAL_IN) 
394 +               {
395 +                               *curtag = mark;
396 +               }
397         return XT_CONTINUE;
398  }
399  
400 -
401  static int
402  checkentry_v0(const char *tablename,
403               const void *entry,
404 @@ -92,7 +330,8 @@
405  
406         if (markinfo->mode != XT_MARK_SET
407             && markinfo->mode != XT_MARK_AND
408 -           && markinfo->mode != XT_MARK_OR) {
409 +           && markinfo->mode != XT_MARK_OR
410 +           && markinfo->mode != XT_MARK_COPYXID) {
411                 printk(KERN_WARNING "MARK: unknown mode %u\n",
412                        markinfo->mode);
413                 return 0;
414 diff -Nurb linux-2.6.22-521/net/netfilter/xt_SETXID.c linux-2.6.22-522/net/netfilter/xt_SETXID.c
415 --- linux-2.6.22-521/net/netfilter/xt_SETXID.c  1969-12-31 19:00:00.000000000 -0500
416 +++ linux-2.6.22-522/net/netfilter/xt_SETXID.c  2008-09-17 17:59:53.000000000 -0400
417 @@ -0,0 +1,79 @@
418 +#include <linux/module.h>
419 +#include <linux/skbuff.h>
420 +#include <linux/ip.h>
421 +#include <net/checksum.h>
422 +#include <linux/vs_network.h>
423 +
424 +#include <linux/netfilter/x_tables.h>
425 +#include <linux/netfilter/xt_SETXID.h>
426 +
427 +MODULE_LICENSE("GPL");
428 +MODULE_AUTHOR("");
429 +MODULE_DESCRIPTION("");
430 +MODULE_ALIAS("ipt_SETXID");
431 +
432 +static unsigned int
433 +target_v1(struct sk_buff **pskb,
434 +         const struct net_device *in,
435 +         const struct net_device *out,
436 +         unsigned int hooknum,
437 +         const struct xt_target *target,
438 +         const void *targinfo)
439 +{
440 +       const struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
441 +
442 +       switch (setxidinfo->mode) {
443 +       case XT_SET_PACKET_XID:
444 +                (*pskb)->skb_tag = setxidinfo->mark;
445 +               break;
446 +       }
447 +       return XT_CONTINUE;
448 +}
449 +
450 +
451 +static int
452 +checkentry_v1(const char *tablename,
453 +             const void *entry,
454 +             const struct xt_target *target,
455 +             void *targinfo,
456 +             unsigned int hook_mask)
457 +{
458 +       struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
459 +
460 +       if (setxidinfo->mode != XT_SET_PACKET_XID) {
461 +               printk(KERN_WARNING "SETXID: unknown mode %u\n",
462 +                      setxidinfo->mode);
463 +               return 0;
464 +       }
465 +
466 +       return 1;
467 +}
468 +
469 +static struct xt_target xt_setxid_target[] = {
470 +       {
471 +               .name           = "SETXID",
472 +               .family         = AF_INET,
473 +               .revision       = 1,
474 +               .checkentry     = checkentry_v1,
475 +               .target         = target_v1,
476 +               .targetsize     = sizeof(struct xt_setxid_target_info_v1),
477 +               .table          = "mangle",
478 +               .me             = THIS_MODULE,
479 +       }
480 +};
481 +
482 +static int __init init(void)
483 +{
484 +       int err;
485 +
486 +       err = xt_register_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
487 +       return err;
488 +}
489 +
490 +static void __exit fini(void)
491 +{
492 +       xt_unregister_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
493 +}
494 +
495 +module_init(init);
496 +module_exit(fini);