Fix for making ping work in slices
[linux-2.6.git] / linux-2.6-520-vnet+.patch
1 diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_MARK.h linux-2.6.22-520/include/linux/netfilter/xt_MARK.h
2 --- linux-2.6.22-510/include/linux/netfilter/xt_MARK.h  2007-07-08 19:32:17.000000000 -0400
3 +++ linux-2.6.22-520/include/linux/netfilter/xt_MARK.h  2008-02-20 04:13:12.000000000 -0500
4 @@ -11,6 +11,7 @@
5         XT_MARK_SET=0,
6         XT_MARK_AND,
7         XT_MARK_OR,
8 +       XT_MARK_COPYXID,
9  };
10  
11  struct xt_mark_target_info_v1 {
12 diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h
13 --- linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h        1969-12-31 19:00:00.000000000 -0500
14 +++ linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h        2008-02-20 04:13:12.000000000 -0500
15 @@ -0,0 +1,14 @@
16 +#ifndef _XT_SETXID_H_target
17 +#define _XT_SETXID_H_target
18 +
19 +/* Version 1 */
20 +enum {
21 +       XT_SET_PACKET_XID=0
22 +};
23 +
24 +struct xt_setxid_target_info_v1 {
25 +       unsigned long mark;
26 +       u_int8_t mode;
27 +};
28 +
29 +#endif /*_XT_SETXID_H_target*/
30 diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h
31 --- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h    2007-07-08 19:32:17.000000000 -0400
32 +++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h    2008-02-20 04:13:12.000000000 -0500
33 @@ -12,6 +12,7 @@
34  #define IPT_MARK_SET   XT_MARK_SET
35  #define IPT_MARK_AND   XT_MARK_AND
36  #define        IPT_MARK_OR     XT_MARK_OR
37 +#define IPT_MARK_COPYXID       XT_MARK_COPYXID
38  
39  #define ipt_mark_target_info_v1 xt_mark_target_info_v1
40  
41 diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h
42 --- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h  1969-12-31 19:00:00.000000000 -0500
43 +++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h  2008-02-20 04:13:12.000000000 -0500
44 @@ -0,0 +1,13 @@
45 +#ifndef _IPT_SETXID_H_target
46 +#define _IPT_SETXID_H_target
47 +
48 +/* Backwards compatibility for old userspace */
49 +
50 +#include <linux/netfilter/xt_SETXID.h>
51 +
52 +/* Version 1 */
53 +#define IPT_SET_PACKET_XID     XT_SET_PACKET_XID
54 +
55 +#define ipt_setxid_target_info_v1 xt_setxid_target_info_v1
56 +
57 +#endif /*_IPT_SETXID_H_target*/
58 diff -Nurb linux-2.6.22-510/include/linux/skbuff.h linux-2.6.22-520/include/linux/skbuff.h
59 --- linux-2.6.22-510/include/linux/skbuff.h     2007-07-08 19:32:17.000000000 -0400
60 +++ linux-2.6.22-520/include/linux/skbuff.h     2008-02-20 04:13:12.000000000 -0500
61 @@ -302,6 +302,7 @@
62  #endif
63  
64         __u32                   mark;
65 +#define skb_tag                        mark
66  
67         sk_buff_data_t          transport_header;
68         sk_buff_data_t          network_header;
69 diff -Nurb linux-2.6.22-510/include/linux/socket.h linux-2.6.22-520/include/linux/socket.h
70 --- linux-2.6.22-510/include/linux/socket.h     2007-07-08 19:32:17.000000000 -0400
71 +++ linux-2.6.22-520/include/linux/socket.h     2008-02-20 04:13:12.000000000 -0500
72 @@ -288,6 +288,8 @@
73  #define SOL_TIPC       271
74  #define SOL_RXRPC      272
75  
76 +#define SO_SETXID      SO_PEERCRED
77 +
78  /* IPX options */
79  #define IPX_TYPE       1
80  
81 diff -Nurb linux-2.6.22-510/include/linux/vserver/network.h linux-2.6.22-520/include/linux/vserver/network.h
82 --- linux-2.6.22-510/include/linux/vserver/network.h    2008-02-20 04:13:10.000000000 -0500
83 +++ linux-2.6.22-520/include/linux/vserver/network.h    2008-02-20 04:13:12.000000000 -0500
84 @@ -45,6 +45,8 @@
85  /* network caps */
86  
87  #define NXC_RAW_ICMP           0x00000100
88 +#define NXC_RAW_SOCKET         0x00000200
89 +#define NXC_RAW_SEND           0x00000400
90  
91  
92  /* address types */
93 diff -Nurb linux-2.6.22-510/include/linux/vserver/network.h.orig linux-2.6.22-520/include/linux/vserver/network.h.orig
94 --- linux-2.6.22-510/include/linux/vserver/network.h.orig       1969-12-31 19:00:00.000000000 -0500
95 +++ linux-2.6.22-520/include/linux/vserver/network.h.orig       2008-02-20 04:13:12.000000000 -0500
96 @@ -0,0 +1,144 @@
97 +#ifndef _VX_NETWORK_H
98 +#define _VX_NETWORK_H
99 +
100 +#include <linux/types.h>
101 +
102 +
103 +#define MAX_N_CONTEXT  65535   /* Arbitrary limit */
104 +
105 +
106 +/* network flags */
107 +
108 +#define NXF_INFO_PRIVATE       0x00000008
109 +
110 +#define NXF_SINGLE_IP          0x00000100
111 +#define NXF_LBACK_REMAP                0x00000200
112 +#define NXF_LBACK_ALLOW                0x00000400
113 +
114 +#define NXF_HIDE_NETIF         0x02000000
115 +#define NXF_HIDE_LBACK         0x04000000
116 +
117 +#define NXF_STATE_SETUP                (1ULL << 32)
118 +#define NXF_STATE_ADMIN                (1ULL << 34)
119 +
120 +#define NXF_SC_HELPER          (1ULL << 36)
121 +#define NXF_PERSISTENT         (1ULL << 38)
122 +
123 +#define NXF_ONE_TIME           (0x0005ULL << 32)
124 +
125 +
126 +#define        NXF_INIT_SET            (__nxf_init_set())
127 +
128 +static inline uint64_t __nxf_init_set(void) {
129 +       return    NXF_STATE_ADMIN
130 +#ifdef CONFIG_VSERVER_AUTO_LBACK
131 +               | NXF_LBACK_REMAP
132 +               | NXF_HIDE_LBACK
133 +#endif
134 +#ifdef CONFIG_VSERVER_AUTO_SINGLE
135 +               | NXF_SINGLE_IP
136 +#endif
137 +               | NXF_HIDE_NETIF;
138 +}
139 +
140 +
141 +/* network caps */
142 +
143 +#define NXC_RAW_ICMP           0x00000100
144 +
145 +
146 +/* address types */
147 +
148 +#define NXA_TYPE_IPV4          0x0001
149 +#define NXA_TYPE_IPV6          0x0002
150 +
151 +#define NXA_TYPE_NONE          0x0000
152 +#define NXA_TYPE_ANY           0x00FF
153 +
154 +#define NXA_TYPE_ADDR          0x0010
155 +#define NXA_TYPE_MASK          0x0020
156 +#define NXA_TYPE_RANGE         0x0040
157 +
158 +#define NXA_MASK_ALL           (NXA_TYPE_ADDR | NXA_TYPE_MASK | NXA_TYPE_RANGE)
159 +
160 +#define NXA_MOD_BCAST          0x0100
161 +#define NXA_MOD_LBACK          0x0200
162 +
163 +#define NXA_LOOPBACK           0x1000
164 +
165 +#define NXA_MASK_BIND          (NXA_MASK_ALL | NXA_MOD_BCAST | NXA_MOD_LBACK)
166 +#define NXA_MASK_SHOW          (NXA_MASK_ALL | NXA_LOOPBACK)
167 +
168 +#ifdef __KERNEL__
169 +
170 +#include <linux/list.h>
171 +#include <linux/spinlock.h>
172 +#include <linux/rcupdate.h>
173 +#include <linux/in.h>
174 +#include <linux/in6.h>
175 +#include <asm/atomic.h>
176 +
177 +struct nx_addr_v4 {
178 +       struct nx_addr_v4 *next;
179 +       struct in_addr ip[2];
180 +       struct in_addr mask;
181 +       uint16_t type;
182 +       uint16_t flags;
183 +};
184 +
185 +struct nx_addr_v6 {
186 +       struct nx_addr_v6 *next;
187 +       struct in6_addr ip;
188 +       struct in6_addr mask;
189 +       uint32_t prefix;
190 +       uint16_t type;
191 +       uint16_t flags;
192 +};
193 +
194 +struct nx_info {
195 +       struct hlist_node nx_hlist;     /* linked list of nxinfos */
196 +       nid_t nx_id;                    /* vnet id */
197 +       atomic_t nx_usecnt;             /* usage count */
198 +       atomic_t nx_tasks;              /* tasks count */
199 +       int nx_state;                   /* context state */
200 +
201 +       uint64_t nx_flags;              /* network flag word */
202 +       uint64_t nx_ncaps;              /* network capabilities */
203 +
204 +       struct in_addr v4_lback;        /* Loopback address */
205 +       struct in_addr v4_bcast;        /* Broadcast address */
206 +       struct nx_addr_v4 v4;           /* First/Single ipv4 address */
207 +#ifdef CONFIG_IPV6
208 +       struct nx_addr_v6 v6;           /* First/Single ipv6 address */
209 +#endif
210 +       char nx_name[65];               /* network context name */
211 +};
212 +
213 +
214 +/* status flags */
215 +
216 +#define NXS_HASHED      0x0001
217 +#define NXS_SHUTDOWN    0x0100
218 +#define NXS_RELEASED    0x8000
219 +
220 +extern struct nx_info *lookup_nx_info(int);
221 +
222 +extern int get_nid_list(int, unsigned int *, int);
223 +extern int nid_is_hashed(nid_t);
224 +
225 +extern int nx_migrate_task(struct task_struct *, struct nx_info *);
226 +
227 +extern long vs_net_change(struct nx_info *, unsigned int);
228 +
229 +struct sock;
230 +
231 +
232 +#define NX_IPV4(n)     ((n)->v4.type != NXA_TYPE_NONE)
233 +#ifdef  CONFIG_IPV6
234 +#define NX_IPV6(n)     ((n)->v6.type != NXA_TYPE_NONE)
235 +#else
236 +#define NX_IPV6(n)     (0)
237 +#endif
238 +
239 +#endif /* __KERNEL__ */
240 +#endif /* _VX_NETWORK_H */
241 diff -Nurb linux-2.6.22-510/include/net/netfilter/nf_conntrack.h linux-2.6.22-520/include/net/netfilter/nf_conntrack.h
242 --- linux-2.6.22-510/include/net/netfilter/nf_conntrack.h       2007-07-08 19:32:17.000000000 -0400
243 +++ linux-2.6.22-520/include/net/netfilter/nf_conntrack.h       2008-02-20 04:13:12.000000000 -0500
244 @@ -131,6 +131,9 @@
245         /* Storage reserved for other modules: */
246         union nf_conntrack_proto proto;
247  
248 +       /* PLANETLAB. VNET-specific */
249 +       xid_t xid[IP_CT_DIR_MAX];
250 +
251         /* features dynamically at the end: helper, nat (both optional) */
252         char data[0];
253  };
254 diff -Nurb linux-2.6.22-510/include/net/raw.h linux-2.6.22-520/include/net/raw.h
255 --- linux-2.6.22-510/include/net/raw.h  2007-07-08 19:32:17.000000000 -0400
256 +++ linux-2.6.22-520/include/net/raw.h  2008-02-20 04:13:12.000000000 -0500
257 @@ -36,7 +36,7 @@
258  
259  extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
260                                     __be32 raddr, __be32 laddr,
261 -                                   int dif);
262 +                                   int dif, int tag);
263  
264  extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
265  
266 diff -Nurb linux-2.6.22-510/net/core/skbuff.c linux-2.6.22-520/net/core/skbuff.c
267 --- linux-2.6.22-510/net/core/skbuff.c  2007-07-08 19:32:17.000000000 -0400
268 +++ linux-2.6.22-520/net/core/skbuff.c  2008-02-20 04:13:12.000000000 -0500
269 @@ -56,6 +56,7 @@
270  #include <linux/rtnetlink.h>
271  #include <linux/init.h>
272  #include <linux/scatterlist.h>
273 +#include <linux/vs_network.h>
274  
275  #include <net/protocol.h>
276  #include <net/dst.h>
277 @@ -174,6 +175,7 @@
278         skb->data = data;
279         skb_reset_tail_pointer(skb);
280         skb->end = skb->tail + size;
281 +       skb->skb_tag = nx_current_nid();
282         /* make sure we initialize shinfo sequentially */
283         shinfo = skb_shinfo(skb);
284         atomic_set(&shinfo->dataref, 1);
285 @@ -443,6 +445,8 @@
286         C(tail);
287         C(end);
288  
289 +       /* Sapan: Cloned skbs aren't owned by anyone. Let the cloner decide who it belongs to. */
290 +
291         atomic_inc(&(skb_shinfo(skb)->dataref));
292         skb->cloned = 1;
293  
294 @@ -492,6 +496,7 @@
295         new->tc_index   = old->tc_index;
296  #endif
297         skb_copy_secmark(new, old);
298 +       new->skb_tag = old->skb_tag;
299         atomic_set(&new->users, 1);
300         skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
301         skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
302 diff -Nurb linux-2.6.22-510/net/core/sock.c linux-2.6.22-520/net/core/sock.c
303 --- linux-2.6.22-510/net/core/sock.c    2008-02-20 04:13:09.000000000 -0500
304 +++ linux-2.6.22-520/net/core/sock.c    2008-02-20 04:13:12.000000000 -0500
305 @@ -444,6 +444,19 @@
306                 }
307                 goto set_sndbuf;
308  
309 +       case SO_SETXID:
310 +               if (current_vx_info()) {
311 +                       ret = -EPERM;
312 +                       break;
313 +               }
314 +               if (val < 0 || val > MAX_S_CONTEXT) {
315 +                       ret = -EINVAL;
316 +                       break;
317 +               }
318 +               sk->sk_xid = val;
319 +               sk->sk_nid = val;
320 +               break;
321 +
322         case SO_RCVBUF:
323                 /* Don't error on this BSD doesn't and if you think
324                    about it this is right. Otherwise apps have to
325 @@ -573,7 +586,7 @@
326                 char devname[IFNAMSIZ];
327  
328                 /* Sorry... */
329 -               if (!capable(CAP_NET_RAW)) {
330 +               if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
331                         ret = -EPERM;
332                         break;
333                 }
334 diff -Nurb linux-2.6.22-510/net/ipv4/af_inet.c linux-2.6.22-520/net/ipv4/af_inet.c
335 --- linux-2.6.22-510/net/ipv4/af_inet.c 2008-02-20 04:13:09.000000000 -0500
336 +++ linux-2.6.22-520/net/ipv4/af_inet.c 2008-02-20 04:13:12.000000000 -0500
337 @@ -178,6 +178,8 @@
338                         return -EAGAIN;
339                 }
340                 inet->sport = htons(inet->num);
341 +               sk->sk_xid = vx_current_xid();
342 +               sk->sk_nid = nx_current_nid();
343         }
344         release_sock(sk);
345         return 0;
346 @@ -312,6 +314,9 @@
347         if ((protocol == IPPROTO_ICMP) &&
348                 nx_capable(answer->capability, NXC_RAW_ICMP))
349                 goto override;
350 +       if (sock->type == SOCK_RAW &&
351 +               nx_capable(answer->capability, NXC_RAW_SOCKET))
352 +               goto override;
353         if (answer->capability > 0 && !capable(answer->capability))
354                 goto out_rcu_unlock;
355  override:
356 diff -Nurb linux-2.6.22-510/net/ipv4/icmp.c linux-2.6.22-520/net/ipv4/icmp.c
357 --- linux-2.6.22-510/net/ipv4/icmp.c    2008-02-20 04:13:12.000000000 -0500
358 +++ linux-2.6.22-520/net/ipv4/icmp.c    2008-02-20 04:13:12.000000000 -0500
359 @@ -709,7 +709,7 @@
360         if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
361                 while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
362                                                  iph->saddr,
363 -                                                skb->dev->ifindex)) != NULL) {
364 +                                                skb->dev->ifindex, skb->skb_tag)) != NULL) {
365                         raw_err(raw_sk, skb, info);
366                         raw_sk = sk_next(raw_sk);
367                         iph = (struct iphdr *)skb->data;
368 diff -Nurb linux-2.6.22-510/net/ipv4/ip_options.c linux-2.6.22-520/net/ipv4/ip_options.c
369 --- linux-2.6.22-510/net/ipv4/ip_options.c      2007-07-08 19:32:17.000000000 -0400
370 +++ linux-2.6.22-520/net/ipv4/ip_options.c      2008-02-20 04:13:12.000000000 -0500
371 @@ -409,7 +409,7 @@
372                                         optptr[2] += 8;
373                                         break;
374                                       default:
375 -                                       if (!skb && !capable(CAP_NET_RAW)) {
376 +                                       if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
377                                                 pp_ptr = optptr + 3;
378                                                 goto error;
379                                         }
380 @@ -445,7 +445,7 @@
381                                 opt->router_alert = optptr - iph;
382                         break;
383                       case IPOPT_CIPSO:
384 -                       if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) {
385 +                       if ((!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) || opt->cipso) {
386                                 pp_ptr = optptr;
387                                 goto error;
388                         }
389 @@ -458,7 +458,7 @@
390                       case IPOPT_SEC:
391                       case IPOPT_SID:
392                       default:
393 -                       if (!skb && !capable(CAP_NET_RAW)) {
394 +                       if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) {
395                                 pp_ptr = optptr;
396                                 goto error;
397                         }
398 diff -Nurb linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c
399 --- linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c       2008-02-20 04:13:05.000000000 -0500
400 +++ linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c       2008-02-20 04:13:12.000000000 -0500
401 @@ -49,6 +49,8 @@
402         else
403                 logflags = NF_LOG_MASK;
404  
405 +       printk("TAG=%d ", skb->skb_tag);
406 +
407         ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
408         if (ih == NULL) {
409                 printk("TRUNCATED");
410 diff -Nurb linux-2.6.22-510/net/ipv4/raw.c linux-2.6.22-520/net/ipv4/raw.c
411 --- linux-2.6.22-510/net/ipv4/raw.c     2008-02-20 04:13:09.000000000 -0500
412 +++ linux-2.6.22-520/net/ipv4/raw.c     2008-02-20 04:13:12.000000000 -0500
413 @@ -103,7 +103,7 @@
414  
415  struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
416                              __be32 raddr, __be32 laddr,
417 -                            int dif)
418 +                            int dif, int tag)
419  {
420         struct hlist_node *node;
421  
422 @@ -112,6 +112,7 @@
423  
424                 if (inet->num == num                                    &&
425                     !(inet->daddr && inet->daddr != raddr)              &&
426 +                   (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag)  &&
427                     v4_sock_addr_match(sk->sk_nx_info, inet, laddr)     &&
428                     !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
429                         goto found; /* gotcha */
430 @@ -161,7 +162,7 @@
431                 goto out;
432         sk = __raw_v4_lookup(__sk_head(head), iph->protocol,
433                              iph->saddr, iph->daddr,
434 -                            skb->dev->ifindex);
435 +                            skb->dev->ifindex, skb->skb_tag);
436  
437         while (sk) {
438                 delivered = 1;
439 @@ -174,7 +175,7 @@
440                 }
441                 sk = __raw_v4_lookup(sk_next(sk), iph->protocol,
442                                      iph->saddr, iph->daddr,
443 -                                    skb->dev->ifindex);
444 +                                    skb->dev->ifindex, skb->skb_tag);
445         }
446  out:
447         read_unlock(&raw_v4_lock);
448 @@ -315,7 +316,7 @@
449         }
450  
451         err = -EPERM;
452 -       if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) &&
453 +       if (!nx_check(0, VS_ADMIN) && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET) &&
454                 sk->sk_nx_info &&
455                 !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND))
456                 goto error_free;
457 diff -Nurb linux-2.6.22-510/net/netfilter/Kconfig linux-2.6.22-520/net/netfilter/Kconfig
458 --- linux-2.6.22-510/net/netfilter/Kconfig      2007-07-08 19:32:17.000000000 -0400
459 +++ linux-2.6.22-520/net/netfilter/Kconfig      2008-02-20 04:13:12.000000000 -0500
460 @@ -389,6 +389,13 @@
461  
462           To compile it as a module, choose M here.  If unsure, say N.
463  
464 +config NETFILTER_XT_TARGET_SETXID
465 +       tristate '"SETXID" target support'
466 +       depends on NETFILTER_XTABLES
467 +       help
468 +         This option adds a `SETXID' target, which allows you to alter the
469 +         xid of a socket.
470 +
471  config NETFILTER_XT_MATCH_COMMENT
472         tristate  '"comment" match support'
473         depends on NETFILTER_XTABLES
474 diff -Nurb linux-2.6.22-510/net/netfilter/Makefile linux-2.6.22-520/net/netfilter/Makefile
475 --- linux-2.6.22-510/net/netfilter/Makefile     2007-07-08 19:32:17.000000000 -0400
476 +++ linux-2.6.22-520/net/netfilter/Makefile     2008-02-20 04:13:12.000000000 -0500
477 @@ -37,6 +37,7 @@
478  obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
479  
480  # targets
481 +obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o
482  obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
483  obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
484  obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
485 diff -Nurb linux-2.6.22-510/net/netfilter/nf_conntrack_core.c linux-2.6.22-520/net/netfilter/nf_conntrack_core.c
486 --- linux-2.6.22-510/net/netfilter/nf_conntrack_core.c  2007-07-08 19:32:17.000000000 -0400
487 +++ linux-2.6.22-520/net/netfilter/nf_conntrack_core.c  2008-02-20 04:13:12.000000000 -0500
488 @@ -726,6 +726,8 @@
489  
490         /* Overload tuple linked list to put us in unconfirmed list. */
491         list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
492 +       conntrack->xid[IP_CT_DIR_ORIGINAL] = -1;
493 +       conntrack->xid[IP_CT_DIR_REPLY] = -1;
494  
495         write_unlock_bh(&nf_conntrack_lock);
496  
497 diff -Nurb linux-2.6.22-510/net/netfilter/xt_MARK.c linux-2.6.22-520/net/netfilter/xt_MARK.c
498 --- linux-2.6.22-510/net/netfilter/xt_MARK.c    2007-07-08 19:32:17.000000000 -0400
499 +++ linux-2.6.22-520/net/netfilter/xt_MARK.c    2008-02-20 04:13:13.000000000 -0500
500 @@ -5,13 +5,18 @@
501   * This program is free software; you can redistribute it and/or modify
502   * it under the terms of the GNU General Public License version 2 as
503   * published by the Free Software Foundation.
504 + *
505   */
506  
507  #include <linux/module.h>
508 +#include <linux/version.h>
509  #include <linux/skbuff.h>
510  #include <linux/ip.h>
511  #include <net/checksum.h>
512 +#include <net/route.h>
513 +#include <net/inet_hashtables.h>
514  
515 +#include <net/netfilter/nf_conntrack.h>
516  #include <linux/netfilter/x_tables.h>
517  #include <linux/netfilter/xt_MARK.h>
518  
519 @@ -21,6 +26,48 @@
520  MODULE_ALIAS("ipt_MARK");
521  MODULE_ALIAS("ip6t_MARK");
522  
523 +static inline u_int16_t
524 +get_dst_port(struct nf_conntrack_tuple *tuple)
525 +{
526 +       switch (tuple->dst.protonum) {
527 +       case IPPROTO_GRE:
528 +               /* XXX Truncate 32-bit GRE key to 16 bits */
529 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
530 +               return tuple->dst.u.gre.key;
531 +#else
532 +               return htons(ntohl(tuple->dst.u.gre.key));
533 +#endif  
534 +       case IPPROTO_ICMP:
535 +               /* Bind on ICMP echo ID */
536 +               return tuple->src.u.icmp.id;
537 +       case IPPROTO_TCP:
538 +               return tuple->dst.u.tcp.port;
539 +       case IPPROTO_UDP:
540 +               return tuple->dst.u.udp.port;
541 +       default:
542 +               return tuple->dst.u.all;
543 +       }
544 +}
545 +
546 +static inline u_int16_t
547 +get_src_port(struct nf_conntrack_tuple *tuple)
548 +{
549 +       switch (tuple->dst.protonum) {
550 +       case IPPROTO_GRE:
551 +               /* XXX Truncate 32-bit GRE key to 16 bits */
552 +               return htons(ntohl(tuple->src.u.gre.key));
553 +       case IPPROTO_ICMP:
554 +               /* Bind on ICMP echo ID */
555 +               return tuple->src.u.icmp.id;
556 +       case IPPROTO_TCP:
557 +               return tuple->src.u.tcp.port;
558 +       case IPPROTO_UDP:
559 +               return tuple->src.u.udp.port;
560 +       default:
561 +               return tuple->src.u.all;
562 +       }
563 +}
564 +
565  static unsigned int
566  target_v0(struct sk_buff **pskb,
567           const struct net_device *in,
568 @@ -44,7 +91,7 @@
569           const void *targinfo)
570  {
571         const struct xt_mark_target_info_v1 *markinfo = targinfo;
572 -       int mark = 0;
573 +       int mark = -1;
574  
575         switch (markinfo->mode) {
576         case XT_MARK_SET:
577 @@ -58,8 +105,48 @@
578         case XT_MARK_OR:
579                 mark = (*pskb)->mark | markinfo->mark;
580                 break;
581 +
582 +       case XT_MARK_COPYXID: {
583 +               enum ip_conntrack_info ctinfo;
584 +               struct sock *connection_sk;
585 +               int dif;
586 +
587 +               struct nf_conn *ct = nf_ct_get((*pskb), &ctinfo);
588 +               extern struct inet_hashinfo tcp_hashinfo;
589 +               enum ip_conntrack_dir dir;
590 +
591 +               if (!ct) 
592 +                       break;
593 +               dir = CTINFO2DIR(ctinfo);
594 +               u_int32_t src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
595 +               u_int16_t src_port = get_src_port(&ct->tuplehash[dir].tuple);
596 +
597 +               u_int32_t ip;
598 +               u_int16_t port;
599 +
600 +               dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
601 +               ip = ct->tuplehash[dir].tuple.dst.u3.ip;
602 +               port = get_dst_port(&ct->tuplehash[dir].tuple);
603 +
604 +               if ((*pskb)->sk) 
605 +                       connection_sk = (*pskb)->sk;
606 +               else {
607 +                       connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
608 +               }
609 +
610 +               if (connection_sk) {
611 +                       connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir];
612 +                       ct->xid[!dir]=connection_sk->sk_xid;
613 +                       if (connection_sk->sk_xid != 0) 
614 +                               mark = connection_sk->sk_xid;
615 +                       if (connection_sk != (*pskb)->sk)
616 +                               sock_put(connection_sk);
617 +               }
618 +               break;
619 +               }
620         }
621  
622 +       if (mark != -1)
623         (*pskb)->mark = mark;
624         return XT_CONTINUE;
625  }
626 @@ -92,7 +179,8 @@
627  
628         if (markinfo->mode != XT_MARK_SET
629             && markinfo->mode != XT_MARK_AND
630 -           && markinfo->mode != XT_MARK_OR) {
631 +           && markinfo->mode != XT_MARK_OR
632 +           && markinfo->mode != XT_MARK_COPYXID) {
633                 printk(KERN_WARNING "MARK: unknown mode %u\n",
634                        markinfo->mode);
635                 return 0;
636 diff -Nurb linux-2.6.22-510/net/netfilter/xt_SETXID.c linux-2.6.22-520/net/netfilter/xt_SETXID.c
637 --- linux-2.6.22-510/net/netfilter/xt_SETXID.c  1969-12-31 19:00:00.000000000 -0500
638 +++ linux-2.6.22-520/net/netfilter/xt_SETXID.c  2008-02-20 04:13:13.000000000 -0500
639 @@ -0,0 +1,79 @@
640 +#include <linux/module.h>
641 +#include <linux/skbuff.h>
642 +#include <linux/ip.h>
643 +#include <net/checksum.h>
644 +#include <linux/vs_network.h>
645 +
646 +#include <linux/netfilter/x_tables.h>
647 +#include <linux/netfilter/xt_SETXID.h>
648 +
649 +MODULE_LICENSE("GPL");
650 +MODULE_AUTHOR("");
651 +MODULE_DESCRIPTION("");
652 +MODULE_ALIAS("ipt_SETXID");
653 +
654 +static unsigned int
655 +target_v1(struct sk_buff **pskb,
656 +         const struct net_device *in,
657 +         const struct net_device *out,
658 +         unsigned int hooknum,
659 +         const struct xt_target *target,
660 +         const void *targinfo)
661 +{
662 +       const struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
663 +
664 +       switch (setxidinfo->mode) {
665 +       case XT_SET_PACKET_XID:
666 +                (*pskb)->skb_tag = setxidinfo->mark;
667 +               break;
668 +       }
669 +       return XT_CONTINUE;
670 +}
671 +
672 +
673 +static int
674 +checkentry_v1(const char *tablename,
675 +             const void *entry,
676 +             const struct xt_target *target,
677 +             void *targinfo,
678 +             unsigned int hook_mask)
679 +{
680 +       struct xt_setxid_target_info_v1 *setxidinfo = targinfo;
681 +
682 +       if (setxidinfo->mode != XT_SET_PACKET_XID) {
683 +               printk(KERN_WARNING "SETXID: unknown mode %u\n",
684 +                      setxidinfo->mode);
685 +               return 0;
686 +       }
687 +
688 +       return 1;
689 +}
690 +
691 +static struct xt_target xt_setxid_target[] = {
692 +       {
693 +               .name           = "SETXID",
694 +               .family         = AF_INET,
695 +               .revision       = 1,
696 +               .checkentry     = checkentry_v1,
697 +               .target         = target_v1,
698 +               .targetsize     = sizeof(struct xt_setxid_target_info_v1),
699 +               .table          = "mangle",
700 +               .me             = THIS_MODULE,
701 +       }
702 +};
703 +
704 +static int __init init(void)
705 +{
706 +       int err;
707 +
708 +       err = xt_register_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
709 +       return err;
710 +}
711 +
712 +static void __exit fini(void)
713 +{
714 +       xt_unregister_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target));
715 +}
716 +
717 +module_init(init);
718 +module_exit(fini);
719 diff -Nurb linux-2.6.22-510/net/packet/af_packet.c linux-2.6.22-520/net/packet/af_packet.c
720 --- linux-2.6.22-510/net/packet/af_packet.c     2007-07-08 19:32:17.000000000 -0400
721 +++ linux-2.6.22-520/net/packet/af_packet.c     2008-02-20 04:13:13.000000000 -0500
722 @@ -78,6 +78,7 @@
723  #include <linux/poll.h>
724  #include <linux/module.h>
725  #include <linux/init.h>
726 +#include <linux/vs_network.h>
727  
728  #ifdef CONFIG_INET
729  #include <net/inet_common.h>
730 @@ -324,6 +325,9 @@
731         __be16 proto=0;
732         int err;
733  
734 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
735 +               return -EPERM;
736 +
737         /*
738          *      Get and verify the address.
739          */
740 @@ -420,6 +424,10 @@
741                                       unsigned int res)
742  {
743         struct sk_filter *filter;
744 +       int tag = skb->skb_tag;
745 +
746 +       if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag))
747 +               return 0;
748  
749         rcu_read_lock_bh();
750         filter = rcu_dereference(sk->sk_filter);
751 @@ -711,6 +719,9 @@
752         unsigned char *addr;
753         int ifindex, err, reserve = 0;
754  
755 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
756 +               return -EPERM;
757 +
758         /*
759          *      Get and verify the address.
760          */
761 @@ -984,7 +995,7 @@
762         __be16 proto = (__force __be16)protocol; /* weird, but documented */
763         int err;
764  
765 -       if (!capable(CAP_NET_RAW))
766 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET))
767                 return -EPERM;
768         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
769             sock->type != SOCK_PACKET)
770 diff -Nurb linux-2.6.22-510/xt_MARK.c linux-2.6.22-520/xt_MARK.c
771 --- linux-2.6.22-510/xt_MARK.c  1969-12-31 19:00:00.000000000 -0500
772 +++ linux-2.6.22-520/xt_MARK.c  2008-02-20 05:14:06.000000000 -0500
773 @@ -0,0 +1,282 @@
774 +/* This is a module which is used for setting the NFMARK field of an skb. */
775 +
776 +/* (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
777 + *
778 + * This program is free software; you can redistribute it and/or modify
779 + * it under the terms of the GNU General Public License version 2 as
780 + * published by the Free Software Foundation.
781 + *
782 + */
783 +
784 +#include <linux/module.h>
785 +#include <linux/version.h>
786 +#include <linux/skbuff.h>
787 +#include <linux/ip.h>
788 +#include <net/checksum.h>
789 +#include <net/route.h>
790 +#include <net/inet_hashtables.h>
791 +
792 +#include <net/netfilter/nf_conntrack.h>
793 +#include <linux/netfilter/x_tables.h>
794 +#include <linux/netfilter/xt_MARK.h>
795 +
796 +MODULE_LICENSE("GPL");
797 +MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
798 +MODULE_DESCRIPTION("ip[6]tables MARK modification module");
799 +MODULE_ALIAS("ipt_MARK");
800 +MODULE_ALIAS("ip6t_MARK");
801 +
802 +static inline u_int16_t
803 +get_dst_port(struct nf_conntrack_tuple *tuple)
804 +{
805 +       switch (tuple->dst.protonum) {
806 +       case IPPROTO_GRE:
807 +               /* XXX Truncate 32-bit GRE key to 16 bits */
808 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)
809 +               return tuple->dst.u.gre.key;
810 +#else
811 +               return htons(ntohl(tuple->dst.u.gre.key));
812 +#endif  
813 +       case IPPROTO_ICMP:
814 +               /* Bind on ICMP echo ID */
815 +               return tuple->src.u.icmp.id;
816 +       case IPPROTO_TCP:
817 +               return tuple->dst.u.tcp.port;
818 +       case IPPROTO_UDP:
819 +               return tuple->dst.u.udp.port;
820 +       default:
821 +               return tuple->dst.u.all;
822 +       }
823 +}
824 +
825 +static inline u_int16_t
826 +get_src_port(struct nf_conntrack_tuple *tuple)
827 +{
828 +       switch (tuple->dst.protonum) {
829 +       case IPPROTO_GRE:
830 +               /* XXX Truncate 32-bit GRE key to 16 bits */
831 +               return htons(ntohl(tuple->src.u.gre.key));
832 +       case IPPROTO_ICMP:
833 +               /* Bind on ICMP echo ID */
834 +               return tuple->src.u.icmp.id;
835 +       case IPPROTO_TCP:
836 +               return tuple->src.u.tcp.port;
837 +       case IPPROTO_UDP:
838 +               return tuple->src.u.udp.port;
839 +       default:
840 +               return tuple->src.u.all;
841 +       }
842 +}
843 +
844 +static unsigned int
845 +target_v0(struct sk_buff **pskb,
846 +         const struct net_device *in,
847 +         const struct net_device *out,
848 +         unsigned int hooknum,
849 +         const struct xt_target *target,
850 +         const void *targinfo)
851 +{
852 +       const struct xt_mark_target_info *markinfo = targinfo;
853 +
854 +       (*pskb)->mark = markinfo->mark;
855 +       return XT_CONTINUE;
856 +}
857 +
858 +static unsigned int
859 +target_v1(struct sk_buff **pskb,
860 +         const struct net_device *in,
861 +         const struct net_device *out,
862 +         unsigned int hooknum,
863 +         const struct xt_target *target,
864 +         const void *targinfo)
865 +{
866 +       const struct xt_mark_target_info_v1 *markinfo = targinfo;
867 +       int mark = -1;
868 +
869 +       switch (markinfo->mode) {
870 +       case XT_MARK_SET:
871 +               mark = markinfo->mark;
872 +               break;
873 +
874 +       case XT_MARK_AND:
875 +               mark = (*pskb)->mark & markinfo->mark;
876 +               break;
877 +
878 +       case XT_MARK_OR:
879 +               mark = (*pskb)->mark | markinfo->mark;
880 +               break;
881 +
882 +       case XT_MARK_COPYXID: {
883 +               enum ip_conntrack_info ctinfo;
884 +               struct sock *connection_sk;
885 +               int dif;
886 +
887 +               struct nf_conn *ct = nf_ct_get((*pskb), &ctinfo);
888 +               extern struct inet_hashinfo tcp_hashinfo;
889 +               enum ip_conntrack_dir dir;
890 +
891 +               if (!ct) 
892 +                       break;
893 +
894 +               dir = CTINFO2DIR(ctinfo);
895 +               u_int32_t src_ip = ct->tuplehash[dir].tuple.src.u3.ip;
896 +               u_int16_t src_port = get_src_port(&ct->tuplehash[dir].tuple);
897 +               u_int16_t proto = ct->tuplehash[dir].tuple.dst.protonum;
898 +
899 +               u_int32_t ip;
900 +               u_int16_t port;
901 +
902 +               dif = ((struct rtable *)(*pskb)->dst)->rt_iif;
903 +               ip = ct->tuplehash[dir].tuple.dst.u3.ip;
904 +               port = get_dst_port(&ct->tuplehash[dir].tuple);
905 +
906 +               if (proto == 1) {
907 +                       ct->xid[!dir]=(*pskb)->mark;
908 +                       if (ct->xid[dir]) mark = ct->xid[dir];
909 +
910 +               }
911 +               else if (proto == 6) {
912 +                               if ((*pskb)->sk) 
913 +                                       connection_sk = (*pskb)->sk;
914 +                               else {
915 +                                       connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif);
916 +                               }
917 +
918 +                               if (connection_sk) {
919 +                                       connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir];
920 +                                       ct->xid[!dir]=connection_sk->sk_xid;
921 +                                       if (connection_sk->sk_xid != 0) 
922 +                                               mark = connection_sk->sk_xid;
923 +                                       if (connection_sk != (*pskb)->sk)
924 +                                               sock_put(connection_sk);
925 +                               }
926 +                               break;
927 +                               }
928 +                             }
929 +       }
930 +
931 +       if (mark != -1)
932 +       (*pskb)->mark = mark;
933 +       return XT_CONTINUE;
934 +}
935 +
936 +
937 +static int
938 +checkentry_v0(const char *tablename,
939 +             const void *entry,
940 +             const struct xt_target *target,
941 +             void *targinfo,
942 +             unsigned int hook_mask)
943 +{
944 +       struct xt_mark_target_info *markinfo = targinfo;
945 +
946 +       if (markinfo->mark > 0xffffffff) {
947 +               printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
948 +               return 0;
949 +       }
950 +       return 1;
951 +}
952 +
953 +static int
954 +checkentry_v1(const char *tablename,
955 +             const void *entry,
956 +             const struct xt_target *target,
957 +             void *targinfo,
958 +             unsigned int hook_mask)
959 +{
960 +       struct xt_mark_target_info_v1 *markinfo = targinfo;
961 +
962 +       if (markinfo->mode != XT_MARK_SET
963 +           && markinfo->mode != XT_MARK_AND
964 +           && markinfo->mode != XT_MARK_OR
965 +           && markinfo->mode != XT_MARK_COPYXID) {
966 +               printk(KERN_WARNING "MARK: unknown mode %u\n",
967 +                      markinfo->mode);
968 +               return 0;
969 +       }
970 +       if (markinfo->mark > 0xffffffff) {
971 +               printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
972 +               return 0;
973 +       }
974 +       return 1;
975 +}
976 +
977 +#ifdef CONFIG_COMPAT
978 +struct compat_xt_mark_target_info_v1 {
979 +       compat_ulong_t  mark;
980 +       u_int8_t        mode;
981 +       u_int8_t        __pad1;
982 +       u_int16_t       __pad2;
983 +};
984 +
985 +static void compat_from_user_v1(void *dst, void *src)
986 +{
987 +       struct compat_xt_mark_target_info_v1 *cm = src;
988 +       struct xt_mark_target_info_v1 m = {
989 +               .mark   = cm->mark,
990 +               .mode   = cm->mode,
991 +       };
992 +       memcpy(dst, &m, sizeof(m));
993 +}
994 +
995 +static int compat_to_user_v1(void __user *dst, void *src)
996 +{
997 +       struct xt_mark_target_info_v1 *m = src;
998 +       struct compat_xt_mark_target_info_v1 cm = {
999 +               .mark   = m->mark,
1000 +               .mode   = m->mode,
1001 +       };
1002 +       return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
1003 +}
1004 +#endif /* CONFIG_COMPAT */
1005 +
1006 +static struct xt_target xt_mark_target[] = {
1007 +       {
1008 +               .name           = "MARK",
1009 +               .family         = AF_INET,
1010 +               .revision       = 0,
1011 +               .checkentry     = checkentry_v0,
1012 +               .target         = target_v0,
1013 +               .targetsize     = sizeof(struct xt_mark_target_info),
1014 +               .table          = "mangle",
1015 +               .me             = THIS_MODULE,
1016 +       },
1017 +       {
1018 +               .name           = "MARK",
1019 +               .family         = AF_INET,
1020 +               .revision       = 1,
1021 +               .checkentry     = checkentry_v1,
1022 +               .target         = target_v1,
1023 +               .targetsize     = sizeof(struct xt_mark_target_info_v1),
1024 +#ifdef CONFIG_COMPAT
1025 +               .compatsize     = sizeof(struct compat_xt_mark_target_info_v1),
1026 +               .compat_from_user = compat_from_user_v1,
1027 +               .compat_to_user = compat_to_user_v1,
1028 +#endif
1029 +               .table          = "mangle",
1030 +               .me             = THIS_MODULE,
1031 +       },
1032 +       {
1033 +               .name           = "MARK",
1034 +               .family         = AF_INET6,
1035 +               .revision       = 0,
1036 +               .checkentry     = checkentry_v0,
1037 +               .target         = target_v0,
1038 +               .targetsize     = sizeof(struct xt_mark_target_info),
1039 +               .table          = "mangle",
1040 +               .me             = THIS_MODULE,
1041 +       },
1042 +};
1043 +
1044 +static int __init xt_mark_init(void)
1045 +{
1046 +       return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
1047 +}
1048 +
1049 +static void __exit xt_mark_fini(void)
1050 +{
1051 +       xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target));
1052 +}
1053 +
1054 +module_init(xt_mark_init);
1055 +module_exit(xt_mark_fini);