An addendum to the change involving UDP-listing/RAW sockets. Not including this fix...
[linux-2.6.git] / linux-2.6-522-iptables-connection-tagging.patch
1 diff -Nurb linux-2.6.22-521/include/linux/netfilter/xt_MARK.h linux-2.6.22-522/include/linux/netfilter/xt_MARK.h
2 --- linux-2.6.22-521/include/linux/netfilter/xt_MARK.h  2007-07-08 19:32:17.000000000 -0400
3 +++ linux-2.6.22-522/include/linux/netfilter/xt_MARK.h  2008-07-28 16:36:24.000000000 -0400
4 @@ -11,6 +11,7 @@
5         XT_MARK_SET=0,
6         XT_MARK_AND,
7         XT_MARK_OR,
8 +       XT_MARK_COPYXID,
9  };
10  
11  struct xt_mark_target_info_v1 {
12 diff -Nurb linux-2.6.22-521/include/linux/netfilter/xt_SETXID.h linux-2.6.22-522/include/linux/netfilter/xt_SETXID.h
13 --- linux-2.6.22-521/include/linux/netfilter/xt_SETXID.h        1969-12-31 19:00:00.000000000 -0500
14 +++ linux-2.6.22-522/include/linux/netfilter/xt_SETXID.h        2008-07-28 16:36:24.000000000 -0400
15 @@ -0,0 +1,14 @@
16 +#ifndef _XT_SETXID_H_target
17 +#define _XT_SETXID_H_target
18 +
19 +/* Version 1 */
20 +enum {
21 +       XT_SET_PACKET_XID=0
22 +};
23 +
24 +struct xt_setxid_target_info_v1 {
25 +       unsigned long mark;
26 +       u_int8_t mode;
27 +};
28 +
29 +#endif /*_XT_SETXID_H_target*/
30 diff -Nurb linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_MARK.h linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_MARK.h
31 --- linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_MARK.h    2007-07-08 19:32:17.000000000 -0400
32 +++ linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_MARK.h    2008-07-28 16:36:24.000000000 -0400
33 @@ -12,6 +12,7 @@
34  #define IPT_MARK_SET   XT_MARK_SET
35  #define IPT_MARK_AND   XT_MARK_AND
36  #define        IPT_MARK_OR     XT_MARK_OR
37 +#define IPT_MARK_COPYXID       XT_MARK_COPYXID
38  
39  #define ipt_mark_target_info_v1 xt_mark_target_info_v1
40  
41 diff -Nurb linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_SETXID.h linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_SETXID.h
42 --- linux-2.6.22-521/include/linux/netfilter_ipv4/ipt_SETXID.h  1969-12-31 19:00:00.000000000 -0500
43 +++ linux-2.6.22-522/include/linux/netfilter_ipv4/ipt_SETXID.h  2008-07-28 16:36:24.000000000 -0400
44 @@ -0,0 +1,13 @@
45 +#ifndef _IPT_SETXID_H_target
46 +#define _IPT_SETXID_H_target
47 +
48 +/* Backwards compatibility for old userspace */
49 +
50 +#include <linux/netfilter/xt_SETXID.h>
51 +
52 +/* Version 1 */
53 +#define IPT_SET_PACKET_XID     XT_SET_PACKET_XID
54 +
55 +#define ipt_setxid_target_info_v1 xt_setxid_target_info_v1
56 +
57 +#endif /*_IPT_SETXID_H_target*/
58 diff -Nurb linux-2.6.22-521/include/net/netfilter/nf_conntrack.h linux-2.6.22-522/include/net/netfilter/nf_conntrack.h
59 --- linux-2.6.22-521/include/net/netfilter/nf_conntrack.h       2007-07-08 19:32:17.000000000 -0400
60 +++ linux-2.6.22-522/include/net/netfilter/nf_conntrack.h       2008-07-28 16:36:24.000000000 -0400
61 @@ -131,6 +131,9 @@
62         /* Storage reserved for other modules: */
63         union nf_conntrack_proto proto;
64  
65 +       /* PLANETLAB. VNET-specific */
66 +       int xid[IP_CT_DIR_MAX];
67 +
68         /* features dynamically at the end: helper, nat (both optional) */
69         char data[0];
70  };
71 diff -Nurb linux-2.6.22-521/net/netfilter/Kconfig linux-2.6.22-522/net/netfilter/Kconfig
72 --- linux-2.6.22-521/net/netfilter/Kconfig      2007-07-08 19:32:17.000000000 -0400
73 +++ linux-2.6.22-522/net/netfilter/Kconfig      2008-07-28 16:36:24.000000000 -0400
74 @@ -389,6 +389,13 @@
75  
76           To compile it as a module, choose M here.  If unsure, say N.
77  
78 +config NETFILTER_XT_TARGET_SETXID
79 +       tristate '"SETXID" target support'
80 +       depends on NETFILTER_XTABLES
81 +       help
82 +         This option adds a `SETXID' target, which allows you to alter the
83 +         xid of a socket.
84 +
85  config NETFILTER_XT_MATCH_COMMENT
86         tristate  '"comment" match support'
87         depends on NETFILTER_XTABLES
88 diff -Nurb linux-2.6.22-521/net/netfilter/Makefile linux-2.6.22-522/net/netfilter/Makefile
89 --- linux-2.6.22-521/net/netfilter/Makefile     2007-07-08 19:32:17.000000000 -0400
90 +++ linux-2.6.22-522/net/netfilter/Makefile     2008-07-28 16:36:24.000000000 -0400
91 @@ -37,6 +37,7 @@
92  obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
93  
94  # targets
95 +obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o
96  obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
97  obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
98  obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
99 diff -Nurb linux-2.6.22-521/net/netfilter/nf_conntrack_core.c linux-2.6.22-522/net/netfilter/nf_conntrack_core.c
100 --- linux-2.6.22-521/net/netfilter/nf_conntrack_core.c  2007-07-08 19:32:17.000000000 -0400
101 +++ linux-2.6.22-522/net/netfilter/nf_conntrack_core.c  2008-07-28 16:36:24.000000000 -0400
102 @@ -726,6 +726,8 @@
103  
104         /* Overload tuple linked list to put us in unconfirmed list. */
105         list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
106 +       conntrack->xid[IP_CT_DIR_ORIGINAL] = -1;
107 +       conntrack->xid[IP_CT_DIR_REPLY] = -1;
108  
109         write_unlock_bh(&nf_conntrack_lock);
110  
111 diff -Nurb linux-2.6.22-521/net/netfilter/xt_MARK.c linux-2.6.22-522/net/netfilter/xt_MARK.c
112 --- linux-2.6.22-521/net/netfilter/xt_MARK.c    2007-07-08 19:32:17.000000000 -0400
113 +++ linux-2.6.22-522/net/netfilter/xt_MARK.c    2008-08-04 16:44:16.000000000 -0400
114 @@ -5,13 +5,19 @@
115   * This program is free software; you can redistribute it and/or modify
116   * it under the terms of the GNU General Public License version 2 as
117   * published by the Free Software Foundation.
118 + *
119   */
120  
121  #include <linux/module.h>
122 +#include <linux/version.h>
123  #include <linux/skbuff.h>
124  #include <linux/ip.h>
125 +#include <net/udp.h>
126  #include <net/checksum.h>
127 +#include <net/route.h>
128 +#include <net/inet_hashtables.h>
129  
130 +#include <net/netfilter/nf_conntrack.h>
131  #include <linux/netfilter/x_tables.h>
132  #include <linux/netfilter/xt_MARK.h>
133  
134 @@ -21,6 +27,48 @@
135  MODULE_ALIAS("ipt_MARK");
136  MODULE_ALIAS("ip6t_MARK");
137  
138 +static inline u_int16_t
139 +get_dst_port(struct nf_conntrack_tuple *tuple)
140 +{
141 +       switch (tuple->dst.protonum) {
142 +       case IPPROTO_GRE:
143 +               /* XXX Truncate 32-bit GRE key to 16 bits */
144 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
145 +               return tuple->dst.u.gre.key;
146 +#else
147 +               return htons(ntohl(tuple->dst.u.gre.key));
148 +#endif  
149 +       case IPPROTO_ICMP:
150 +               /* Bind on ICMP echo ID */
151 +               return tuple->src.u.icmp.id;
152 +       case IPPROTO_TCP:
153 +               return tuple->dst.u.tcp.port;
154 +       case IPPROTO_UDP:
155 +               return tuple->dst.u.udp.port;
156 +       default:
157 +               return tuple->dst.u.all;
158 +       }
159 +}
160 +
161 +static inline u_int16_t
162 +get_src_port(struct nf_conntrack_tuple *tuple)
163 +{
164 +       switch (tuple->dst.protonum) {
165 +       case IPPROTO_GRE:
166 +               /* XXX Truncate 32-bit GRE key to 16 bits */
167 +               return htons(ntohl(tuple->src.u.gre.key));
168 +       case IPPROTO_ICMP:
169 +               /* Bind on ICMP echo ID */
170 +               return tuple->src.u.icmp.id;
171 +       case IPPROTO_TCP:
172 +               return tuple->src.u.tcp.port;
173 +       case IPPROTO_UDP:
174 +               return tuple->src.u.udp.port;
175 +       default:
176 +               return tuple->src.u.all;
177 +       }
178 +}
179 +
180  static unsigned int
181  target_v0(struct sk_buff **pskb,
182           const struct net_device *in,
183 @@ -35,6 +83,67 @@
184         return XT_CONTINUE;
185  }
186  
187 +extern DEFINE_PER_CPU(int, sknid_elevator);
188 +static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
189 +                      __be32 daddr, __be16 dport,
190 +                      int dif, struct hlist_head udptable[])
191 +{
192 +    struct sock *sk, *result = NULL;
193 +    struct hlist_node *node;
194 +    unsigned short hnum = ntohs(dport);
195 +    int badness = -1;
196 +
197 +    read_lock(&udp_hash_lock);
198 +
199 +    sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
200 +        struct inet_sock *inet = inet_sk(sk);
201 +
202 +        if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
203 +            int score = (sk->sk_family == PF_INET ? 1 : 0);
204 +
205 +            if (inet->rcv_saddr) {
206 +                if (inet->rcv_saddr != daddr)
207 +                    continue;
208 +                score+=2;
209 +            } else {
210 +                /* block non nx_info ips */
211 +                if (!v4_addr_in_nx_info(sk->sk_nx_info,
212 +                    daddr, NXA_MASK_BIND))
213 +                    continue;
214 +            }
215 +            if (inet->daddr) {
216 +                if (inet->daddr != saddr)
217 +                    continue;
218 +                score+=2;
219 +            }
220 +            if (inet->dport) {
221 +                if (inet->dport != sport)
222 +                    continue;
223 +                score+=2;
224 +            }
225 +            if (sk->sk_bound_dev_if) {
226 +                if (sk->sk_bound_dev_if != dif)
227 +                    continue;
228 +                score+=2;
229 +            }
230 +            if (score == 9) {
231 +                result = sk;
232 +                break;
233 +            } else if (score > badness) {
234 +                result = sk;
235 +                badness = score;
236 +            }
237 +        }
238 +    }
239 +
240 +    if (result)
241 +        sock_hold(result);
242 +    read_unlock(&udp_hash_lock);
243 +    return result;
244 +}
245 +
246 +#define related(ct) (ct==(IP_CT_IS_REPLY + IP_CT_RELATED))
247 +
248  static unsigned int
249  target_v1(struct sk_buff **pskb,
250           const struct net_device *in,
251 @@ -44,7 +153,20 @@
252           const void *targinfo)
253  {
254         const struct xt_mark_target_info_v1 *markinfo = targinfo;
255 -       int mark = 0;
256 +       enum ip_conntrack_info ctinfo;
257 +      struct sock *connection_sk;
258 +      int dif;
259 +      struct nf_conn *ct;
260 +      extern struct inet_hashinfo tcp_hashinfo;
261 +      enum ip_conntrack_dir dir;
262 +      int *curtag;
263 +      u_int32_t src_ip;
264 +      u_int32_t dst_ip;
265 +      u_int16_t proto, src_port;
266 +      u_int32_t ip;
267 +      u_int16_t port;
268 +
269 +       int mark = -1;
270  
271         switch (markinfo->mode) {
272         case XT_MARK_SET:
273 @@ -58,13 +180,121 @@
274         case XT_MARK_OR:
275                 mark = (*pskb)->mark | markinfo->mark;
276                 break;
277 +
278 +       case XT_MARK_COPYXID: 
279 +             dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
280 +
281 +             ct = nf_ct_get((*pskb), &ctinfo);
282 +             if (!ct) 
283 +                     break;
284 +
285 +             dir = CTINFO2DIR(ctinfo);
286 +             src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
287 +             dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
288 +             src_port = get_src_port(&ct->tuplehash[dir].tuple);
289 +             proto = ct->tuplehash[dir].tuple.dst.protonum;
290 +
291 +             ip = ct->tuplehash[dir].tuple.dst.u3.ip;
292 +             port = get_dst_port(&ct->tuplehash[dir].tuple);
293 +
294 +             if (proto == 1) {
295 +                                 if ((*pskb)->mark>0) /* The packet is marked, it's going out */
296 +                                 {
297 +                                                 ct->xid[0]=(*pskb)->mark;
298 +                                 }
299 +
300 +                                 if (ct->xid[0] > 0) {
301 +                                                 mark = ct->xid[0];
302 +                                 }
303 +             }
304 +                 else if (proto == 17) {
305 +                  struct sock *sk;
306 +                  if (!(*pskb)->mark) {
307 +                          sk = __udp4_lib_lookup(src_ip, src_port, ip, port,
308 +                                          dif, udp_hash);
309 +
310 +                          if (sk && hooknum==NF_IP_LOCAL_IN) {
311 +                                  mark=sk->sk_nid;
312 +                          }
313 +
314 +                                                 if (sk) {
315 +                                                                 sock_put(sk);
316 +                                                 }
317 +                  }
318 +                  else
319 +                  if ((*pskb)->mark>0) /* The packet is marked, it's going out */
320 +                  {
321 +                          ct->xid[0]=(*pskb)->mark;
322 +                  }
323 +          }
324 +             else if (proto == 6) { 
325 +                     if ((*pskb)->sk) {
326 +                             /* It's a listening socket */
327 +                             connection_sk = (*pskb)->sk;
328 +                             sock_hold(connection_sk);
329 +                     }
330 +                     else   /* It might be a connected socket */
331 +                             connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
332 +
333 +
334 +                     if (connection_sk /* Well, some kind of TCP socket */) {
335 +                             if (connection_sk->sk_peercred.uid == 0 || connection_sk->sk_peercred.uid == (__u32) -1) {
336 +                                     /* Normal case - the peercred on the socket is not set */ 
337 +                                     connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[!dir];
338 +                             }
339 +                             else    /* Exceptional case - the peercred was set using SET_PEERCRED. Somebody wants us
340 +                                        to mark packets with some arbitrary value.*/
341 +                                     mark=connection_sk->sk_peercred.uid;
342 +
343 +                             /* Has this connection already been tagged? */
344 +                             if (ct->xid[dir] < 1) {
345 +                                     /* No - let's tag it */ 
346 +                                     ct->xid[dir]=connection_sk->sk_nid;
347 +                             }
348 +
349 +                             if (mark==-1 && (connection_sk->sk_nid != 0))
350 +                                     mark = ct->xid[dir];
351 +
352 +
353 +                             if (connection_sk->sk_state == TCP_TIME_WAIT) {
354 +                                     inet_twsk_put(inet_twsk(connection_sk));
355 +                                     break;
356 +                             }
357 +                             else
358 +                                     sock_put(connection_sk);
359 +                     }
360 +
361 +                     /* Covers CoDemux case */
362 +                     if (mark < 1 && (ct->xid[dir]>0)) {
363 +                             mark = ct->xid[dir];
364         }
365  
366 +                     if (mark < 1 && (ct->xid[!dir]>0)) {
367 +                               mark = ct->xid[!dir];
368 +                     }
369 +
370 +                     /* All else failed. Is this a connection over raw sockets? That explains
371 +                      * why we couldn't get anything out of skb->sk, or look up a "real" connection.*/
372 +                     if (ct->xid[dir]<1) {
373 +                       if ((*pskb)->skb_tag) {
374 +                               ct->xid[dir]=(*pskb)->skb_tag;
375 +                       }
376 +                     }
377 +             }
378 +             break;
379 +       }
380 +       if (mark != -1) {
381         (*pskb)->mark = mark;
382 +       }
383 +
384 +       curtag=&__get_cpu_var(sknid_elevator);
385 +       if (mark > 0 && *curtag==-2 && hooknum==NF_IP_LOCAL_IN) 
386 +       {
387 +               *curtag = mark;
388 +       }
389         return XT_CONTINUE;
390  }
391  
392 -
393  static int
394  checkentry_v0(const char *tablename,
395               const void *entry,
396 @@ -92,7 +322,8 @@
397  
398         if (markinfo->mode != XT_MARK_SET
399             && markinfo->mode != XT_MARK_AND
400 -           && markinfo->mode != XT_MARK_OR) {
401 +           && markinfo->mode != XT_MARK_OR
402 +           && markinfo->mode != XT_MARK_COPYXID) {
403                 printk(KERN_WARNING "MARK: unknown mode %u\n",
404                        markinfo->mode);
405                 return 0;
406 diff -Nurb linux-2.6.22-521/net/netfilter/xt_SETXID.c linux-2.6.22-522/net/netfilter/xt_SETXID.c
407 --- linux-2.6.22-521/net/netfilter/xt_SETXID.c  1969-12-31 19:00:00.000000000 -0500
408 +++ linux-2.6.22-522/net/netfilter/xt_SETXID.c  2008-07-28 16:36:24.000000000 -0400
409 @@ -0,0 +1,79 @@
410 +#include <linux/module.h>
411 +#include <linux/skbuff.h>
412 +#include <linux/ip.h>
413 +#include <net/checksum.h>
414 +#include <linux/vs_network.h>
415 +
416 +#include <linux/netfilter/x_tables.h>
417 +#include <linux/netfilter/xt_SETXID.h>
418 +
419 +MODULE_LICENSE("GPL");
420 +MODULE_AUTHOR("");
421 +MODULE_DESCRIPTION("");
422 +MODULE_ALIAS("ipt_SETXID");
423 +
424 +static unsigned int
425 +target_v1(struct sk_buff **pskb,
426 +         const struct net_device *in,
427 +         const struct net_device *out,
428 +         unsigned int hooknum,
429 +         const struct xt_target *target,
430 +         const void *targinfo)
431 +{
432 +       const struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
433 +
434 +       switch (setxidinfo->mode) {
435 +       case XT_SET_PACKET_XID:
436 +                (*pskb)->skb_tag = setxidinfo->mark;
437 +               break;
438 +       }
439 +       return XT_CONTINUE;
440 +}
441 +
442 +
443 +static int
444 +checkentry_v1(const char *tablename,
445 +             const void *entry,
446 +             const struct xt_target *target,
447 +             void *targinfo,
448 +             unsigned int hook_mask)
449 +{
450 +       struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
451 +
452 +       if (setxidinfo->mode != XT_SET_PACKET_XID) {
453 +               printk(KERN_WARNING "SETXID: unknown mode %u\n",
454 +                      setxidinfo->mode);
455 +               return 0;
456 +       }
457 +
458 +       return 1;
459 +}
460 +
461 +static struct xt_target xt_setxid_target[] = {
462 +       {
463 +               .name           = "SETXID",
464 +               .family         = AF_INET,
465 +               .revision       = 1,
466 +               .checkentry     = checkentry_v1,
467 +               .target         = target_v1,
468 +               .targetsize     = sizeof(struct xt_setxid_target_info_v1),
469 +               .table          = "mangle",
470 +               .me             = THIS_MODULE,
471 +       }
472 +};
473 +
474 +static int __init init(void)
475 +{
476 +       int err;
477 +
478 +       err = xt_register_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
479 +       return err;
480 +}
481 +
482 +static void __exit fini(void)
483 +{
484 +       xt_unregister_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
485 +}
486 +
487 +module_init(init);
488 +module_exit(fini);