adding the patch file too
[linux-2.6.git] / linux-2.6-520-vnet+.patch
1 diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_MARK.h linux-2.6.22-520/include/linux/netfilter/xt_MARK.h
2 --- linux-2.6.22-510/include/linux/netfilter/xt_MARK.h  2007-07-08 19:32:17.000000000 -0400
3 +++ linux-2.6.22-520/include/linux/netfilter/xt_MARK.h  2008-06-06 17:07:56.000000000 -0400
4 @@ -11,6 +11,7 @@
5         XT_MARK_SET=0,
6         XT_MARK_AND,
7         XT_MARK_OR,
8 +       XT_MARK_COPYXID,
9  };
10  
11  struct xt_mark_target_info_v1 {
12 diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h
13 --- linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h        1969-12-31 19:00:00.000000000 -0500
14 +++ linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h        2008-06-06 17:07:56.000000000 -0400
15 @@ -0,0 +1,14 @@
16 +#ifndef _XT_SETXID_H_target
17 +#define _XT_SETXID_H_target
18 +
19 +/* Version 1 */
20 +enum {
21 +       XT_SET_PACKET_XID=0
22 +};
23 +
24 +struct xt_setxid_target_info_v1 {
25 +       unsigned long mark;
26 +       u_int8_t mode;
27 +};
28 +
29 +#endif /*_XT_SETXID_H_target*/
30 diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h
31 --- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h    2007-07-08 19:32:17.000000000 -0400
32 +++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h    2008-06-06 17:07:56.000000000 -0400
33 @@ -12,6 +12,7 @@
34  #define IPT_MARK_SET   XT_MARK_SET
35  #define IPT_MARK_AND   XT_MARK_AND
36  #define        IPT_MARK_OR     XT_MARK_OR
37 +#define IPT_MARK_COPYXID       XT_MARK_COPYXID
38  
39  #define ipt_mark_target_info_v1 xt_mark_target_info_v1
40  
41 diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h
42 --- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h  1969-12-31 19:00:00.000000000 -0500
43 +++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h  2008-06-06 17:07:56.000000000 -0400
44 @@ -0,0 +1,13 @@
45 +#ifndef _IPT_SETXID_H_target
46 +#define _IPT_SETXID_H_target
47 +
48 +/* Backwards compatibility for old userspace */
49 +
50 +#include <linux/netfilter/xt_SETXID.h>
51 +
52 +/* Version 1 */
53 +#define IPT_SET_PACKET_XID     XT_SET_PACKET_XID
54 +
55 +#define ipt_setxid_target_info_v1 xt_setxid_target_info_v1
56 +
57 +#endif /*_IPT_SETXID_H_target*/
58 diff -Nurb linux-2.6.22-510/include/linux/skbuff.h linux-2.6.22-520/include/linux/skbuff.h
59 --- linux-2.6.22-510/include/linux/skbuff.h     2007-07-08 19:32:17.000000000 -0400
60 +++ linux-2.6.22-520/include/linux/skbuff.h     2008-06-06 17:07:56.000000000 -0400
61 @@ -302,6 +302,7 @@
62  #endif
63  
64         __u32                   mark;
65 +#define skb_tag                        mark
66  
67         sk_buff_data_t          transport_header;
68         sk_buff_data_t          network_header;
69 diff -Nurb linux-2.6.22-510/include/linux/socket.h linux-2.6.22-520/include/linux/socket.h
70 --- linux-2.6.22-510/include/linux/socket.h     2007-07-08 19:32:17.000000000 -0400
71 +++ linux-2.6.22-520/include/linux/socket.h     2008-06-06 17:07:56.000000000 -0400
72 @@ -288,6 +288,8 @@
73  #define SOL_TIPC       271
74  #define SOL_RXRPC      272
75  
76 +#define SO_SETXID      SO_PEERCRED
77 +
78  /* IPX options */
79  #define IPX_TYPE       1
80  
81 diff -Nurb linux-2.6.22-510/include/linux/vserver/network.h linux-2.6.22-520/include/linux/vserver/network.h
82 --- linux-2.6.22-510/include/linux/vserver/network.h    2008-06-06 17:07:48.000000000 -0400
83 +++ linux-2.6.22-520/include/linux/vserver/network.h    2008-06-06 17:07:56.000000000 -0400
84 @@ -47,6 +47,8 @@
85  #define NXC_TUN_CREATE         0x00000001
86  
87  #define NXC_RAW_ICMP           0x00000100
88 +#define NXC_RAW_SOCKET         0x00000200
89 +#define NXC_RAW_SEND           0x00000400
90  
91  
92  /* address types */
93 diff -Nurb linux-2.6.22-510/include/net/netfilter/nf_conntrack.h linux-2.6.22-520/include/net/netfilter/nf_conntrack.h
94 --- linux-2.6.22-510/include/net/netfilter/nf_conntrack.h       2007-07-08 19:32:17.000000000 -0400
95 +++ linux-2.6.22-520/include/net/netfilter/nf_conntrack.h       2008-06-06 17:07:56.000000000 -0400
96 @@ -131,6 +131,9 @@
97         /* Storage reserved for other modules: */
98         union nf_conntrack_proto proto;
99  
100 +       /* PLANETLAB. VNET-specific */
101 +       int xid[IP_CT_DIR_MAX];
102 +
103         /* features dynamically at the end: helper, nat (both optional) */
104         char data[0];
105  };
106 diff -Nurb linux-2.6.22-510/include/net/raw.h linux-2.6.22-520/include/net/raw.h
107 --- linux-2.6.22-510/include/net/raw.h  2007-07-08 19:32:17.000000000 -0400
108 +++ linux-2.6.22-520/include/net/raw.h  2008-06-06 17:07:56.000000000 -0400
109 @@ -36,7 +36,7 @@
110  
111  extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
112                                     __be32 raddr, __be32 laddr,
113 -                                   int dif);
114 +                                   int dif, int tag);
115  
116  extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
117  
118 diff -Nurb linux-2.6.22-510/net/core/dev.c linux-2.6.22-520/net/core/dev.c
119 --- linux-2.6.22-510/net/core/dev.c     2008-06-06 17:07:48.000000000 -0400
120 +++ linux-2.6.22-520/net/core/dev.c     2008-06-06 17:07:56.000000000 -0400
121 @@ -1803,6 +1803,7 @@
122   * the ingress scheduler, you just cant add policies on ingress.
123   *
124   */
125 +
126  static int ing_filter(struct sk_buff *skb)
127  {
128         struct Qdisc *q;
129 @@ -1832,13 +1833,20 @@
130  }
131  #endif
132  
133 +/* The code already makes the assumption that packet handlers run
134 + * sequentially on the same CPU. -Sapan */
135 +DEFINE_PER_CPU(int, sknid_elevator);
136 +
137  int netif_receive_skb(struct sk_buff *skb)
138  {
139         struct packet_type *ptype, *pt_prev;
140         struct net_device *orig_dev;
141         int ret = NET_RX_DROP;
142 +       int *cur_elevator=&__get_cpu_var(sknid_elevator);
143         __be16 type;
144  
145 +       *cur_elevator = 0;
146 +
147         /* if we've gotten here through NAPI, check netpoll */
148         if (skb->dev->poll && netpoll_rx(skb))
149                 return NET_RX_DROP;
150 @@ -1873,8 +1881,9 @@
151  
152         list_for_each_entry_rcu(ptype, &ptype_all, list) {
153                 if (!ptype->dev || ptype->dev == skb->dev) {
154 -                       if (pt_prev)
155 +                       if (pt_prev) {
156                                 ret = deliver_skb(skb, pt_prev, orig_dev);
157 +                       }
158                         pt_prev = ptype;
159                 }
160         }
161 @@ -1912,8 +1921,22 @@
162                 }
163         }
164  
165 +       /* We don't want the packet handlers to throw the packet away
166 +        * if we want the taps to treat it again - Sapan */
167 +       if (*cur_elevator) {
168 +               atomic_inc(&skb->users);
169 +       }
170 +
171         if (pt_prev) {
172                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
173 +                       if (*cur_elevator > 0) {
174 +                               skb->skb_tag = *cur_elevator;
175 +                               list_for_each_entry_rcu(ptype, &ptype_all, list) {
176 +                                       if (!ptype->dev || ptype->dev == skb->dev) {
177 +                                                       ret = deliver_skb(skb, ptype, orig_dev);
178 +                                       }
179 +                               }
180 +                       }
181         } else {
182                 kfree_skb(skb);
183                 /* Jamal, now you will not able to escape explaining
184 @@ -1922,6 +1945,13 @@
185                 ret = NET_RX_DROP;
186         }
187  
188 +       if (*cur_elevator) {
189 +               /* We have a packet */
190 +               kfree_skb(skb);
191 +       }
192 +
193 +       *cur_elevator=0;
194 +
195  out:
196         rcu_read_unlock();
197         return ret;
198 @@ -3780,6 +3810,7 @@
199  EXPORT_SYMBOL(net_enable_timestamp);
200  EXPORT_SYMBOL(net_disable_timestamp);
201  EXPORT_SYMBOL(dev_get_flags);
202 +EXPORT_PER_CPU_SYMBOL(sknid_elevator);
203  
204  #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
205  EXPORT_SYMBOL(br_handle_frame_hook);
206 diff -Nurb linux-2.6.22-510/net/core/skbuff.c linux-2.6.22-520/net/core/skbuff.c
207 --- linux-2.6.22-510/net/core/skbuff.c  2007-07-08 19:32:17.000000000 -0400
208 +++ linux-2.6.22-520/net/core/skbuff.c  2008-06-06 17:07:56.000000000 -0400
209 @@ -56,6 +56,7 @@
210  #include <linux/rtnetlink.h>
211  #include <linux/init.h>
212  #include <linux/scatterlist.h>
213 +#include <linux/vs_network.h>
214  
215  #include <net/protocol.h>
216  #include <net/dst.h>
217 @@ -174,6 +175,7 @@
218         skb->data = data;
219         skb_reset_tail_pointer(skb);
220         skb->end = skb->tail + size;
221 +       if (!in_interrupt()) skb->skb_tag = nx_current_nid(); else skb->skb_tag = 0;
222         /* make sure we initialize shinfo sequentially */
223         shinfo = skb_shinfo(skb);
224         atomic_set(&shinfo->dataref, 1);
225 @@ -443,6 +445,8 @@
226         C(tail);
227         C(end);
228  
229 +       /* Sapan: Cloned skbs aren't owned by anyone. Let the cloner decide who it belongs to. */
230 +
231         atomic_inc(&(skb_shinfo(skb)->dataref));
232         skb->cloned = 1;
233  
234 @@ -492,6 +496,7 @@
235         new->tc_index   = old->tc_index;
236  #endif
237         skb_copy_secmark(new, old);
238 +       new->skb_tag = old->skb_tag;
239         atomic_set(&new->users, 1);
240         skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
241         skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
242 diff -Nurb linux-2.6.22-510/net/core/sock.c linux-2.6.22-520/net/core/sock.c
243 --- linux-2.6.22-510/net/core/sock.c    2008-06-06 17:07:48.000000000 -0400
244 +++ linux-2.6.22-520/net/core/sock.c    2008-06-06 17:07:56.000000000 -0400
245 @@ -444,6 +444,19 @@
246                 }
247                 goto set_sndbuf;
248  
249 +       case SO_SETXID:
250 +               if (current_vx_info()) {
251 +                       ret = -EPERM;
252 +                       break;
253 +               }
254 +               if (val < 0 || val > MAX_S_CONTEXT) {
255 +                       ret = -EINVAL;
256 +                       break;
257 +               }
258 +               sk->sk_xid = val;
259 +               sk->sk_nid = val;
260 +               break;
261 +
262         case SO_RCVBUF:
263                 /* Don't error on this BSD doesn't and if you think
264                    about it this is right. Otherwise apps have to
265 @@ -573,7 +586,7 @@
266                 char devname[IFNAMSIZ];
267  
268                 /* Sorry... */
269 -               if (!capable(CAP_NET_RAW)) {
270 +               if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
271                         ret = -EPERM;
272                         break;
273                 }
274 diff -Nurb linux-2.6.22-510/net/ipv4/af_inet.c linux-2.6.22-520/net/ipv4/af_inet.c
275 --- linux-2.6.22-510/net/ipv4/af_inet.c 2008-06-06 17:07:48.000000000 -0400
276 +++ linux-2.6.22-520/net/ipv4/af_inet.c 2008-06-06 17:07:56.000000000 -0400
277 @@ -178,6 +178,8 @@
278                         return -EAGAIN;
279                 }
280                 inet->sport = htons(inet->num);
281 +               sk->sk_xid = vx_current_xid();
282 +               if (!in_interrupt()) sk->sk_nid = nx_current_nid(); else sk->sk_nid=0;
283         }
284         release_sock(sk);
285         return 0;
286 @@ -312,6 +314,9 @@
287         if ((protocol == IPPROTO_ICMP) &&
288                 nx_capable(answer->capability, NXC_RAW_ICMP))
289                 goto override;
290 +       if (sock->type == SOCK_RAW &&
291 +               nx_capable(answer->capability, NXC_RAW_SOCKET))
292 +               goto override;
293         if (answer->capability > 0 && !capable(answer->capability))
294                 goto out_rcu_unlock;
295  override:
296 diff -Nurb linux-2.6.22-510/net/ipv4/icmp.c linux-2.6.22-520/net/ipv4/icmp.c
297 --- linux-2.6.22-510/net/ipv4/icmp.c    2008-06-06 17:07:55.000000000 -0400
298 +++ linux-2.6.22-520/net/ipv4/icmp.c    2008-06-06 17:07:56.000000000 -0400
299 @@ -709,7 +709,7 @@
300         if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
301                 while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
302                                                  iph->saddr,
303 -                                                skb->dev->ifindex)) != NULL) {
304 +                                                skb->dev->ifindex, skb->skb_tag)) != NULL) {
305                         raw_err(raw_sk, skb, info);
306                         raw_sk = sk_next(raw_sk);
307                         iph = (struct iphdr *)skb->data;
308 diff -Nurb linux-2.6.22-510/net/ipv4/ip_options.c linux-2.6.22-520/net/ipv4/ip_options.c
309 --- linux-2.6.22-510/net/ipv4/ip_options.c      2007-07-08 19:32:17.000000000 -0400
310 +++ linux-2.6.22-520/net/ipv4/ip_options.c      2008-06-06 17:07:56.000000000 -0400
311 @@ -409,7 +409,7 @@
312                                         optptr[2] += 8;
313                                         break;
314                                       default:
315 -                                       if (!skb && !capable(CAP_NET_RAW)) {
316 +                                       if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
317                                                 pp_ptr = optptr + 3;
318                                                 goto error;
319                                         }
320 @@ -445,7 +445,7 @@
321                                 opt->router_alert = optptr - iph;
322                         break;
323                       case IPOPT_CIPSO:
324 -                       if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) {
325 +                       if ((!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) || opt->cipso) {
326                                 pp_ptr = optptr;
327                                 goto error;
328                         }
329 @@ -458,7 +458,7 @@
330                       case IPOPT_SEC:
331                       case IPOPT_SID:
332                       default:
333 -                       if (!skb && !capable(CAP_NET_RAW)) {
334 +                       if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
335                                 pp_ptr = optptr;
336                                 goto error;
337                         }
338 diff -Nurb linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c
339 --- linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c       2008-06-06 17:07:43.000000000 -0400
340 +++ linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c       2008-06-06 17:07:56.000000000 -0400
341 @@ -49,6 +49,8 @@
342         else
343                 logflags = NF_LOG_MASK;
344  
345 +       printk("TAG=%d ", skb->skb_tag);
346 +
347         ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
348         if (ih == NULL) {
349                 printk("TRUNCATED");
350 diff -Nurb linux-2.6.22-510/net/ipv4/raw.c linux-2.6.22-520/net/ipv4/raw.c
351 --- linux-2.6.22-510/net/ipv4/raw.c     2008-06-06 17:07:48.000000000 -0400
352 +++ linux-2.6.22-520/net/ipv4/raw.c     2008-06-06 17:07:56.000000000 -0400
353 @@ -103,7 +103,7 @@
354  
355  struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
356                              __be32 raddr, __be32 laddr,
357 -                            int dif)
358 +                            int dif, int tag)
359  {
360         struct hlist_node *node;
361  
362 @@ -112,6 +112,7 @@
363  
364                 if (inet->num == num                                    &&
365                     !(inet->daddr && inet->daddr != raddr)              &&
366 +                   (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag)  &&
367                     v4_sock_addr_match(sk->sk_nx_info, inet, laddr)     &&
368                     !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
369                         goto found; /* gotcha */
370 @@ -161,7 +162,7 @@
371                 goto out;
372         sk = __raw_v4_lookup(__sk_head(head), iph->protocol,
373                              iph->saddr, iph->daddr,
374 -                            skb->dev->ifindex);
375 +                            skb->dev->ifindex, skb->skb_tag);
376  
377         while (sk) {
378                 delivered = 1;
379 @@ -174,7 +175,7 @@
380                 }
381                 sk = __raw_v4_lookup(sk_next(sk), iph->protocol,
382                                      iph->saddr, iph->daddr,
383 -                                    skb->dev->ifindex);
384 +                                    skb->dev->ifindex, skb->skb_tag);
385         }
386  out:
387         read_unlock(&raw_v4_lock);
388 @@ -315,7 +316,7 @@
389         }
390  
391         err = -EPERM;
392 -       if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
393 +       if (!nx_check(0, VS_ADMIN) && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET) &&
394                 sk->sk_nx_info &&
395                 !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
396                 goto error_free;
397 diff -Nurb linux-2.6.22-510/net/netfilter/Kconfig linux-2.6.22-520/net/netfilter/Kconfig
398 --- linux-2.6.22-510/net/netfilter/Kconfig      2007-07-08 19:32:17.000000000 -0400
399 +++ linux-2.6.22-520/net/netfilter/Kconfig      2008-06-06 17:07:56.000000000 -0400
400 @@ -389,6 +389,13 @@
401  
402           To compile it as a module, choose M here.  If unsure, say N.
403  
404 +config NETFILTER_XT_TARGET_SETXID
405 +       tristate '"SETXID" target support'
406 +       depends on NETFILTER_XTABLES
407 +       help
408 +         This option adds a `SETXID' target, which allows you to alter the
409 +         xid of a socket.
410 +
411  config NETFILTER_XT_MATCH_COMMENT
412         tristate  '"comment" match support'
413         depends on NETFILTER_XTABLES
414 diff -Nurb linux-2.6.22-510/net/netfilter/Makefile linux-2.6.22-520/net/netfilter/Makefile
415 --- linux-2.6.22-510/net/netfilter/Makefile     2007-07-08 19:32:17.000000000 -0400
416 +++ linux-2.6.22-520/net/netfilter/Makefile     2008-06-06 17:07:56.000000000 -0400
417 @@ -37,6 +37,7 @@
418  obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
419  
420  # targets
421 +obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o
422  obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
423  obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
424  obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
425 diff -Nurb linux-2.6.22-510/net/netfilter/nf_conntrack_core.c linux-2.6.22-520/net/netfilter/nf_conntrack_core.c
426 --- linux-2.6.22-510/net/netfilter/nf_conntrack_core.c  2007-07-08 19:32:17.000000000 -0400
427 +++ linux-2.6.22-520/net/netfilter/nf_conntrack_core.c  2008-06-06 17:07:56.000000000 -0400
428 @@ -726,6 +726,8 @@
429  
430         /* Overload tuple linked list to put us in unconfirmed list. */
431         list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
432 +       conntrack->xid[IP_CT_DIR_ORIGINAL] = -1;
433 +       conntrack->xid[IP_CT_DIR_REPLY] = -1;
434  
435         write_unlock_bh(&nf_conntrack_lock);
436  
437 diff -Nurb linux-2.6.22-510/net/netfilter/xt_MARK.c linux-2.6.22-520/net/netfilter/xt_MARK.c
438 --- linux-2.6.22-510/net/netfilter/xt_MARK.c    2007-07-08 19:32:17.000000000 -0400
439 +++ linux-2.6.22-520/net/netfilter/xt_MARK.c    2008-06-07 17:55:26.000000000 -0400
440 @@ -5,13 +5,18 @@
441   * This program is free software; you can redistribute it and/or modify
442   * it under the terms of the GNU General Public License version 2 as
443   * published by the Free Software Foundation.
444 + *
445   */
446  
447  #include <linux/module.h>
448 +#include <linux/version.h>
449  #include <linux/skbuff.h>
450  #include <linux/ip.h>
451  #include <net/checksum.h>
452 +#include <net/route.h>
453 +#include <net/inet_hashtables.h>
454  
455 +#include <net/netfilter/nf_conntrack.h>
456  #include <linux/netfilter/x_tables.h>
457  #include <linux/netfilter/xt_MARK.h>
458  
459 @@ -21,6 +26,48 @@
460  MODULE_ALIAS("ipt_MARK");
461  MODULE_ALIAS("ip6t_MARK");
462  
463 +static inline u_int16_t
464 +get_dst_port(struct nf_conntrack_tuple *tuple)
465 +{
466 +       switch (tuple->dst.protonum) {
467 +       case IPPROTO_GRE:
468 +               /* XXX Truncate 32-bit GRE key to 16 bits */
469 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
470 +               return tuple->dst.u.gre.key;
471 +#else
472 +               return htons(ntohl(tuple->dst.u.gre.key));
473 +#endif  
474 +       case IPPROTO_ICMP:
475 +               /* Bind on ICMP echo ID */
476 +               return tuple->src.u.icmp.id;
477 +       case IPPROTO_TCP:
478 +               return tuple->dst.u.tcp.port;
479 +       case IPPROTO_UDP:
480 +               return tuple->dst.u.udp.port;
481 +       default:
482 +               return tuple->dst.u.all;
483 +       }
484 +}
485 +
486 +static inline u_int16_t
487 +get_src_port(struct nf_conntrack_tuple *tuple)
488 +{
489 +       switch (tuple->dst.protonum) {
490 +       case IPPROTO_GRE:
491 +               /* XXX Truncate 32-bit GRE key to 16 bits */
492 +               return htons(ntohl(tuple->src.u.gre.key));
493 +       case IPPROTO_ICMP:
494 +               /* Bind on ICMP echo ID */
495 +               return tuple->src.u.icmp.id;
496 +       case IPPROTO_TCP:
497 +               return tuple->src.u.tcp.port;
498 +       case IPPROTO_UDP:
499 +               return tuple->src.u.udp.port;
500 +       default:
501 +               return tuple->src.u.all;
502 +       }
503 +}
504 +
505  static unsigned int
506  target_v0(struct sk_buff **pskb,
507           const struct net_device *in,
508 @@ -35,6 +82,8 @@
509         return XT_CONTINUE;
510  }
511  
512 +extern DEFINE_PER_CPU(int, sknid_elevator);
513 +
514  static unsigned int
515  target_v1(struct sk_buff **pskb,
516           const struct net_device *in,
517 @@ -44,7 +93,20 @@
518           const void *targinfo)
519  {
520         const struct xt_mark_target_info_v1 *markinfo = targinfo;
521 -       int mark = 0;
522 +       enum ip_conntrack_info ctinfo;
523 +      struct sock *connection_sk;
524 +      int dif;
525 +      struct nf_conn *ct;
526 +      extern struct inet_hashinfo tcp_hashinfo;
527 +      enum ip_conntrack_dir dir;
528 +      int *curtag;
529 +      u_int32_t src_ip;
530 +      u_int32_t dst_ip;
531 +      u_int16_t proto, src_port;
532 +      u_int32_t ip;
533 +      u_int16_t port;
534 +
535 +       int mark = -1;
536  
537         switch (markinfo->mode) {
538         case XT_MARK_SET:
539 @@ -58,13 +120,74 @@
540         case XT_MARK_OR:
541                 mark = (*pskb)->mark | markinfo->mark;
542                 break;
543 +
544 +               case XT_MARK_COPYXID: 
545 +                                             
546 +                                             ct = nf_ct_get((*pskb), &ctinfo);
547 +                                             if (!ct) 
548 +                                                     break;
549 +
550 +                                             dir = CTINFO2DIR(ctinfo);
551 +                                             src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
552 +                                             dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip;
553 +                                             src_port = get_src_port(&ct->tuplehash[dir].tuple);
554 +                                             proto = ct->tuplehash[dir].tuple.dst.protonum;
555 +
556 +                                             dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
557 +                                             ip = ct->tuplehash[dir].tuple.dst.u3.ip;
558 +                                             port = get_dst_port(&ct->tuplehash[dir].tuple);
559 +
560 +                                             if (proto == 1 || proto == 17) {
561 +                                                     if ((*pskb)->mark>0) /* The packet is marked, it's going out */
562 +                                                     {
563 +                                                               //if (ct->xid[0]>0 && ct->xid[0]!=(*pskb)->mark)
564 +                                                                       /*printk(KERN_CRIT "xt_MARK log: %d/%d/%d/%d\n",ct->xid[0],(*pskb)->mark,hooknum==NF_IP_LOCAL_IN,proto);*/
565 +
566 +                                                               ct->xid[0]=(*pskb)->mark;
567         }
568  
569 +                                                     if (ct->xid[0] > 0) {
570 +                                                             mark = ct->xid[0];
571 +                                                     }
572 +
573 +                                             }
574 +                                             else if (proto == 6) { 
575 +                                                     if ((*pskb)->sk) {
576 +                                                             connection_sk = (*pskb)->sk;
577 +                                                             sock_hold(connection_sk);
578 +                                                     }
579 +                                                     else 
580 +                                                             connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
581 +                                                             
582 +
583 +                                                     if (connection_sk) {
584 +                                                             if (connection_sk->sk_state == TCP_TIME_WAIT) {
585 +                                                                     inet_twsk_put(inet_twsk(connection_sk));
586 +                                                                     break;
587 +                                                             }
588 +                                                             connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir];
589 +                                                             ct->xid[!dir]=connection_sk->sk_nid;
590 +                                                             if (connection_sk->sk_nid != 0) 
591 +                                                                     mark = connection_sk->sk_nid;
592 +                                                             sock_put(connection_sk);
593 +                                                     }
594 +                                                     else 
595 +                                                             mark = -1 ; 
596 +                                             }
597 +                                             break;
598 +       }
599 +       if (mark != -1) {
600         (*pskb)->mark = mark;
601 +       }
602 +
603 +       curtag=&__get_cpu_var(sknid_elevator);
604 +       if (mark > 0 && *curtag==-2) 
605 +       {
606 +               *curtag = mark;
607 +       }
608         return XT_CONTINUE;
609  }
610  
611 -
612  static int
613  checkentry_v0(const char *tablename,
614               const void *entry,
615 @@ -92,7 +215,8 @@
616  
617         if (markinfo->mode != XT_MARK_SET
618             && markinfo->mode != XT_MARK_AND
619 -           && markinfo->mode != XT_MARK_OR) {
620 +           && markinfo->mode != XT_MARK_OR
621 +           && markinfo->mode != XT_MARK_COPYXID) {
622                 printk(KERN_WARNING "MARK: unknown mode %u\n",
623                        markinfo->mode);
624                 return 0;
625 diff -Nurb linux-2.6.22-510/net/netfilter/xt_SETXID.c linux-2.6.22-520/net/netfilter/xt_SETXID.c
626 --- linux-2.6.22-510/net/netfilter/xt_SETXID.c  1969-12-31 19:00:00.000000000 -0500
627 +++ linux-2.6.22-520/net/netfilter/xt_SETXID.c  2008-06-06 17:07:56.000000000 -0400
628 @@ -0,0 +1,79 @@
629 +#include <linux/module.h>
630 +#include <linux/skbuff.h>
631 +#include <linux/ip.h>
632 +#include <net/checksum.h>
633 +#include <linux/vs_network.h>
634 +
635 +#include <linux/netfilter/x_tables.h>
636 +#include <linux/netfilter/xt_SETXID.h>
637 +
638 +MODULE_LICENSE("GPL");
639 +MODULE_AUTHOR("");
640 +MODULE_DESCRIPTION("");
641 +MODULE_ALIAS("ipt_SETXID");
642 +
643 +static unsigned int
644 +target_v1(struct sk_buff **pskb,
645 +         const struct net_device *in,
646 +         const struct net_device *out,
647 +         unsigned int hooknum,
648 +         const struct xt_target *target,
649 +         const void *targinfo)
650 +{
651 +       const struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
652 +
653 +       switch (setxidinfo->mode) {
654 +       case XT_SET_PACKET_XID:
655 +                (*pskb)->skb_tag = setxidinfo->mark;
656 +               break;
657 +       }
658 +       return XT_CONTINUE;
659 +}
660 +
661 +
662 +static int
663 +checkentry_v1(const char *tablename,
664 +             const void *entry,
665 +             const struct xt_target *target,
666 +             void *targinfo,
667 +             unsigned int hook_mask)
668 +{
669 +       struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
670 +
671 +       if (setxidinfo->mode != XT_SET_PACKET_XID) {
672 +               printk(KERN_WARNING "SETXID: unknown mode %u\n",
673 +                      setxidinfo->mode);
674 +               return 0;
675 +       }
676 +
677 +       return 1;
678 +}
679 +
680 +static struct xt_target xt_setxid_target[] = {
681 +       {
682 +               .name           = "SETXID",
683 +               .family         = AF_INET,
684 +               .revision       = 1,
685 +               .checkentry     = checkentry_v1,
686 +               .target         = target_v1,
687 +               .targetsize     = sizeof(struct xt_setxid_target_info_v1),
688 +               .table          = "mangle",
689 +               .me             = THIS_MODULE,
690 +       }
691 +};
692 +
693 +static int __init init(void)
694 +{
695 +       int err;
696 +
697 +       err = xt_register_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
698 +       return err;
699 +}
700 +
701 +static void __exit fini(void)
702 +{
703 +       xt_unregister_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
704 +}
705 +
706 +module_init(init);
707 +module_exit(fini);
708 diff -Nurb linux-2.6.22-510/net/packet/af_packet.c linux-2.6.22-520/net/packet/af_packet.c
709 --- linux-2.6.22-510/net/packet/af_packet.c     2007-07-08 19:32:17.000000000 -0400
710 +++ linux-2.6.22-520/net/packet/af_packet.c     2008-06-07 18:30:41.000000000 -0400
711 @@ -78,6 +78,7 @@
712  #include <linux/poll.h>
713  #include <linux/module.h>
714  #include <linux/init.h>
715 +#include <linux/vs_network.h>
716  
717  #ifdef CONFIG_INET
718  #include <net/inet_common.h>
719 @@ -246,10 +247,13 @@
720  
721  static const struct proto_ops packet_ops_spkt;
722  
723 +extern DEFINE_PER_CPU(int, sknid_elevator);
724  static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
725  {
726         struct sock *sk;
727         struct sockaddr_pkt *spkt;
728 +       int tag = skb->skb_tag;
729 +       int *elevator=&__get_cpu_var(sknid_elevator);
730  
731         /*
732          *      When we registered the protocol we saved the socket in the data
733 @@ -269,6 +273,22 @@
734          *      so that this procedure is noop.
735          */
736  
737 +       /* 
738 +        * (18:05:41) daniel_hozac: where?
739 +        * (18:05:58) daniel_hozac: we already have filters on PF_PACKET, don't we?
740 +        * (18:05:58) er: in packet_rcv_skpt
741 +        * (18:07:33) daniel_hozac: oh, that's evil. 
742 +        */
743 +
744 +       if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
745 +               *elevator=-2;
746 +               goto out;
747 +       }
748 +       else if (!sk->sk_nx_info && *elevator) {
749 +               /* Root has already seen this packet */
750 +               goto out;
751 +       }
752 +
753         if (skb->pkt_type == PACKET_LOOPBACK)
754                 goto out;
755  
756 @@ -324,6 +344,9 @@
757         __be16 proto=0;
758         int err;
759  
760 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
761 +               return -EPERM;
762 +
763         /*
764          *      Get and verify the address.
765          */
766 @@ -420,6 +443,17 @@
767                                       unsigned int res)
768  {
769         struct sk_filter *filter;
770 +       int tag = skb->skb_tag;
771 +       int *elevator=&__get_cpu_var(sknid_elevator);
772 +
773 +       if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
774 +               *elevator=-2;
775 +               return 0;
776 +       }
777 +       else if (!sk->sk_nx_info && *elevator) {
778 +               /* Root has already seen this packet */
779 +               return 0;
780 +       }
781  
782         rcu_read_lock_bh();
783         filter = rcu_dereference(sk->sk_filter);
784 @@ -711,6 +745,9 @@
785         unsigned char *addr;
786         int ifindex, err, reserve = 0;
787  
788 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) 
789 +               return -EPERM;
790 +
791         /*
792          *      Get and verify the address.
793          */
794 @@ -984,8 +1021,9 @@
795         __be16 proto = (__force __be16)protocol; /* weird, but documented */
796         int err;
797  
798 -       if (!capable(CAP_NET_RAW))
799 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET))
800                 return -EPERM;
801 +               
802         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
803             sock->type != SOCK_PACKET)
804                 return -ESOCKTNOSUPPORT;