diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_MARK.h linux-2.6.22-520/include/linux/netfilter/xt_MARK.h --- linux-2.6.22-510/include/linux/netfilter/xt_MARK.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/include/linux/netfilter/xt_MARK.h 2008-06-06 17:07:56.000000000 -0400 @@ -11,6 +11,7 @@ XT_MARK_SET=0, XT_MARK_AND, XT_MARK_OR, + XT_MARK_COPYXID, }; struct xt_mark_target_info_v1 { diff -Nurb linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h --- linux-2.6.22-510/include/linux/netfilter/xt_SETXID.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-520/include/linux/netfilter/xt_SETXID.h 2008-06-06 17:07:56.000000000 -0400 @@ -0,0 +1,14 @@ +#ifndef _XT_SETXID_H_target +#define _XT_SETXID_H_target + +/* Version 1 */ +enum { + XT_SET_PACKET_XID=0 +}; + +struct xt_setxid_target_info_v1 { + unsigned long mark; + u_int8_t mode; +}; + +#endif /*_XT_SETXID_H_target*/ diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h --- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_MARK.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_MARK.h 2008-06-06 17:07:56.000000000 -0400 @@ -12,6 +12,7 @@ #define IPT_MARK_SET XT_MARK_SET #define IPT_MARK_AND XT_MARK_AND #define IPT_MARK_OR XT_MARK_OR +#define IPT_MARK_COPYXID XT_MARK_COPYXID #define ipt_mark_target_info_v1 xt_mark_target_info_v1 diff -Nurb linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h --- linux-2.6.22-510/include/linux/netfilter_ipv4/ipt_SETXID.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-520/include/linux/netfilter_ipv4/ipt_SETXID.h 2008-06-06 17:07:56.000000000 -0400 @@ -0,0 +1,13 @@ +#ifndef _IPT_SETXID_H_target +#define _IPT_SETXID_H_target + +/* Backwards compatibility for old userspace */ + +#include + +/* Version 1 */ +#define IPT_SET_PACKET_XID XT_SET_PACKET_XID + +#define ipt_setxid_target_info_v1 xt_setxid_target_info_v1 + +#endif /*_IPT_SETXID_H_target*/ diff -Nurb linux-2.6.22-510/include/linux/skbuff.h linux-2.6.22-520/include/linux/skbuff.h --- linux-2.6.22-510/include/linux/skbuff.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/include/linux/skbuff.h 2008-06-06 17:07:56.000000000 -0400 @@ -302,6 +302,7 @@ #endif __u32 mark; +#define skb_tag mark sk_buff_data_t transport_header; sk_buff_data_t network_header; diff -Nurb linux-2.6.22-510/include/linux/socket.h linux-2.6.22-520/include/linux/socket.h --- linux-2.6.22-510/include/linux/socket.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/include/linux/socket.h 2008-06-06 17:07:56.000000000 -0400 @@ -288,6 +288,8 @@ #define SOL_TIPC 271 #define SOL_RXRPC 272 +#define SO_SETXID SO_PEERCRED + /* IPX options */ #define IPX_TYPE 1 diff -Nurb linux-2.6.22-510/include/linux/vserver/network.h linux-2.6.22-520/include/linux/vserver/network.h --- linux-2.6.22-510/include/linux/vserver/network.h 2008-06-06 17:07:48.000000000 -0400 +++ linux-2.6.22-520/include/linux/vserver/network.h 2008-06-06 17:07:56.000000000 -0400 @@ -47,6 +47,8 @@ #define NXC_TUN_CREATE 0x00000001 #define NXC_RAW_ICMP 0x00000100 +#define NXC_RAW_SOCKET 0x00000200 +#define NXC_RAW_SEND 0x00000400 /* address types */ diff -Nurb linux-2.6.22-510/include/net/netfilter/nf_conntrack.h linux-2.6.22-520/include/net/netfilter/nf_conntrack.h --- linux-2.6.22-510/include/net/netfilter/nf_conntrack.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/include/net/netfilter/nf_conntrack.h 2008-06-06 17:07:56.000000000 -0400 @@ -131,6 +131,9 @@ /* Storage reserved for other modules: */ union nf_conntrack_proto proto; + /* PLANETLAB. VNET-specific */ + int xid[IP_CT_DIR_MAX]; + /* features dynamically at the end: helper, nat (both optional) */ char data[0]; }; diff -Nurb linux-2.6.22-510/include/net/raw.h linux-2.6.22-520/include/net/raw.h --- linux-2.6.22-510/include/net/raw.h 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/include/net/raw.h 2008-06-06 17:07:56.000000000 -0400 @@ -36,7 +36,7 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, - int dif); + int dif, int tag); extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); diff -Nurb linux-2.6.22-510/net/core/dev.c linux-2.6.22-520/net/core/dev.c --- linux-2.6.22-510/net/core/dev.c 2008-06-06 17:07:48.000000000 -0400 +++ linux-2.6.22-520/net/core/dev.c 2008-06-06 17:07:56.000000000 -0400 @@ -1803,6 +1803,7 @@ * the ingress scheduler, you just cant add policies on ingress. * */ + static int ing_filter(struct sk_buff *skb) { struct Qdisc *q; @@ -1832,13 +1833,20 @@ } #endif +/* The code already makes the assumption that packet handlers run + * sequentially on the same CPU. -Sapan */ +DEFINE_PER_CPU(int, sknid_elevator); + int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; struct net_device *orig_dev; int ret = NET_RX_DROP; + int *cur_elevator=&__get_cpu_var(sknid_elevator); __be16 type; + *cur_elevator = 0; + /* if we've gotten here through NAPI, check netpoll */ if (skb->dev->poll && netpoll_rx(skb)) return NET_RX_DROP; @@ -1873,8 +1881,9 @@ list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { - if (pt_prev) + if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); + } pt_prev = ptype; } } @@ -1912,8 +1921,22 @@ } } + /* We don't want the packet handlers to throw the packet away + * if we want the taps to treat it again - Sapan */ + if (*cur_elevator) { + atomic_inc(&skb->users); + } + if (pt_prev) { ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + if (*cur_elevator > 0) { + skb->skb_tag = *cur_elevator; + list_for_each_entry_rcu(ptype, &ptype_all, list) { + if (!ptype->dev || ptype->dev == skb->dev) { + ret = deliver_skb(skb, ptype, orig_dev); + } + } + } } else { kfree_skb(skb); /* Jamal, now you will not able to escape explaining @@ -1922,6 +1945,13 @@ ret = NET_RX_DROP; } + if (*cur_elevator) { + /* We have a packet */ + kfree_skb(skb); + } + + *cur_elevator=0; + out: rcu_read_unlock(); return ret; @@ -3780,6 +3810,7 @@ EXPORT_SYMBOL(net_enable_timestamp); EXPORT_SYMBOL(net_disable_timestamp); EXPORT_SYMBOL(dev_get_flags); +EXPORT_PER_CPU_SYMBOL(sknid_elevator); #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) EXPORT_SYMBOL(br_handle_frame_hook); diff -Nurb linux-2.6.22-510/net/core/skbuff.c linux-2.6.22-520/net/core/skbuff.c --- linux-2.6.22-510/net/core/skbuff.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/net/core/skbuff.c 2008-06-06 17:07:56.000000000 -0400 @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -174,6 +175,7 @@ skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; + if (!in_interrupt()) skb->skb_tag = nx_current_nid(); else skb->skb_tag = 0; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); atomic_set(&shinfo->dataref, 1); @@ -443,6 +445,8 @@ C(tail); C(end); + /* Sapan: Cloned skbs aren't owned by anyone. Let the cloner decide who it belongs to. */ + atomic_inc(&(skb_shinfo(skb)->dataref)); skb->cloned = 1; @@ -492,6 +496,7 @@ new->tc_index = old->tc_index; #endif skb_copy_secmark(new, old); + new->skb_tag = old->skb_tag; atomic_set(&new->users, 1); skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; diff -Nurb linux-2.6.22-510/net/core/sock.c linux-2.6.22-520/net/core/sock.c --- linux-2.6.22-510/net/core/sock.c 2008-06-06 17:07:48.000000000 -0400 +++ linux-2.6.22-520/net/core/sock.c 2008-06-06 17:07:56.000000000 -0400 @@ -444,6 +444,19 @@ } goto set_sndbuf; + case SO_SETXID: + if (current_vx_info()) { + ret = -EPERM; + break; + } + if (val < 0 || val > MAX_S_CONTEXT) { + ret = -EINVAL; + break; + } + sk->sk_xid = val; + sk->sk_nid = val; + break; + case SO_RCVBUF: /* Don't error on this BSD doesn't and if you think about it this is right. Otherwise apps have to @@ -573,7 +586,7 @@ char devname[IFNAMSIZ]; /* Sorry... */ - if (!capable(CAP_NET_RAW)) { + if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) { ret = -EPERM; break; } diff -Nurb linux-2.6.22-510/net/ipv4/af_inet.c linux-2.6.22-520/net/ipv4/af_inet.c --- linux-2.6.22-510/net/ipv4/af_inet.c 2008-06-06 17:07:48.000000000 -0400 +++ linux-2.6.22-520/net/ipv4/af_inet.c 2008-06-06 17:07:56.000000000 -0400 @@ -178,6 +178,8 @@ return -EAGAIN; } inet->sport = htons(inet->num); + sk->sk_xid = vx_current_xid(); + if (!in_interrupt()) sk->sk_nid = nx_current_nid(); else sk->sk_nid=0; } release_sock(sk); return 0; @@ -312,6 +314,9 @@ if ((protocol == IPPROTO_ICMP) && nx_capable(answer->capability, NXC_RAW_ICMP)) goto override; + if (sock->type == SOCK_RAW && + nx_capable(answer->capability, NXC_RAW_SOCKET)) + goto override; if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; override: diff -Nurb linux-2.6.22-510/net/ipv4/icmp.c linux-2.6.22-520/net/ipv4/icmp.c --- linux-2.6.22-510/net/ipv4/icmp.c 2008-06-06 17:07:55.000000000 -0400 +++ linux-2.6.22-520/net/ipv4/icmp.c 2008-06-06 17:07:56.000000000 -0400 @@ -709,7 +709,7 @@ if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) { while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, iph->saddr, - skb->dev->ifindex)) != NULL) { + skb->dev->ifindex, skb->skb_tag)) != NULL) { raw_err(raw_sk, skb, info); raw_sk = sk_next(raw_sk); iph = (struct iphdr *)skb->data; diff -Nurb linux-2.6.22-510/net/ipv4/ip_options.c linux-2.6.22-520/net/ipv4/ip_options.c --- linux-2.6.22-510/net/ipv4/ip_options.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/net/ipv4/ip_options.c 2008-06-06 17:07:56.000000000 -0400 @@ -409,7 +409,7 @@ optptr[2] += 8; break; default: - if (!skb && !capable(CAP_NET_RAW)) { + if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) { pp_ptr = optptr + 3; goto error; } @@ -445,7 +445,7 @@ opt->router_alert = optptr - iph; break; case IPOPT_CIPSO: - if ((!skb && !capable(CAP_NET_RAW)) || opt->cipso) { + if ((!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) || opt->cipso) { pp_ptr = optptr; goto error; } @@ -458,7 +458,7 @@ case IPOPT_SEC: case IPOPT_SID: default: - if (!skb && !capable(CAP_NET_RAW)) { + if (!skb && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) { pp_ptr = optptr; goto error; } diff -Nurb linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c --- linux-2.6.22-510/net/ipv4/netfilter/ipt_LOG.c 2008-06-06 17:07:43.000000000 -0400 +++ linux-2.6.22-520/net/ipv4/netfilter/ipt_LOG.c 2008-06-06 17:07:56.000000000 -0400 @@ -49,6 +49,8 @@ else logflags = NF_LOG_MASK; + printk("TAG=%d ", skb->skb_tag); + ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); if (ih == NULL) { printk("TRUNCATED"); diff -Nurb linux-2.6.22-510/net/ipv4/raw.c linux-2.6.22-520/net/ipv4/raw.c --- linux-2.6.22-510/net/ipv4/raw.c 2008-06-06 17:07:48.000000000 -0400 +++ linux-2.6.22-520/net/ipv4/raw.c 2008-06-06 17:07:56.000000000 -0400 @@ -103,7 +103,7 @@ struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, - int dif) + int dif, int tag) { struct hlist_node *node; @@ -112,6 +112,7 @@ if (inet->num == num && !(inet->daddr && inet->daddr != raddr) && + (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag) && v4_sock_addr_match(sk->sk_nx_info, inet, laddr) && !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) goto found; /* gotcha */ @@ -161,7 +162,7 @@ goto out; sk = __raw_v4_lookup(__sk_head(head), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex); + skb->dev->ifindex, skb->skb_tag); while (sk) { delivered = 1; @@ -174,7 +175,7 @@ } sk = __raw_v4_lookup(sk_next(sk), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex); + skb->dev->ifindex, skb->skb_tag); } out: read_unlock(&raw_v4_lock); @@ -315,7 +316,7 @@ } err = -EPERM; - if (!nx_check(0, VS_ADMIN) && !capable(CAP_NET_RAW) && + if (!nx_check(0, VS_ADMIN) && !nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET) && sk->sk_nx_info && !v4_addr_in_nx_info(sk->sk_nx_info, iph->saddr, NXA_MASK_BIND)) goto error_free; diff -Nurb linux-2.6.22-510/net/netfilter/Kconfig linux-2.6.22-520/net/netfilter/Kconfig --- linux-2.6.22-510/net/netfilter/Kconfig 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/net/netfilter/Kconfig 2008-06-06 17:07:56.000000000 -0400 @@ -389,6 +389,13 @@ To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_SETXID + tristate '"SETXID" target support' + depends on NETFILTER_XTABLES + help + This option adds a `SETXID' target, which allows you to alter the + xid of a socket. + config NETFILTER_XT_MATCH_COMMENT tristate '"comment" match support' depends on NETFILTER_XTABLES diff -Nurb linux-2.6.22-510/net/netfilter/Makefile linux-2.6.22-520/net/netfilter/Makefile --- linux-2.6.22-510/net/netfilter/Makefile 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/net/netfilter/Makefile 2008-06-06 17:07:56.000000000 -0400 @@ -37,6 +37,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # targets +obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o diff -Nurb linux-2.6.22-510/net/netfilter/nf_conntrack_core.c linux-2.6.22-520/net/netfilter/nf_conntrack_core.c --- linux-2.6.22-510/net/netfilter/nf_conntrack_core.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/net/netfilter/nf_conntrack_core.c 2008-06-06 17:07:56.000000000 -0400 @@ -726,6 +726,8 @@ /* Overload tuple linked list to put us in unconfirmed list. */ list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); + conntrack->xid[IP_CT_DIR_ORIGINAL] = -1; + conntrack->xid[IP_CT_DIR_REPLY] = -1; write_unlock_bh(&nf_conntrack_lock); diff -Nurb linux-2.6.22-510/net/netfilter/xt_MARK.c linux-2.6.22-520/net/netfilter/xt_MARK.c --- linux-2.6.22-510/net/netfilter/xt_MARK.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/net/netfilter/xt_MARK.c 2008-06-07 17:55:26.000000000 -0400 @@ -5,13 +5,18 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. + * */ #include +#include #include #include #include +#include +#include +#include #include #include @@ -21,6 +26,48 @@ MODULE_ALIAS("ipt_MARK"); MODULE_ALIAS("ip6t_MARK"); +static inline u_int16_t +get_dst_port(struct nf_conntrack_tuple *tuple) +{ + switch (tuple->dst.protonum) { + case IPPROTO_GRE: + /* XXX Truncate 32-bit GRE key to 16 bits */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11) + return tuple->dst.u.gre.key; +#else + return htons(ntohl(tuple->dst.u.gre.key)); +#endif + case IPPROTO_ICMP: + /* Bind on ICMP echo ID */ + return tuple->src.u.icmp.id; + case IPPROTO_TCP: + return tuple->dst.u.tcp.port; + case IPPROTO_UDP: + return tuple->dst.u.udp.port; + default: + return tuple->dst.u.all; + } +} + +static inline u_int16_t +get_src_port(struct nf_conntrack_tuple *tuple) +{ + switch (tuple->dst.protonum) { + case IPPROTO_GRE: + /* XXX Truncate 32-bit GRE key to 16 bits */ + return htons(ntohl(tuple->src.u.gre.key)); + case IPPROTO_ICMP: + /* Bind on ICMP echo ID */ + return tuple->src.u.icmp.id; + case IPPROTO_TCP: + return tuple->src.u.tcp.port; + case IPPROTO_UDP: + return tuple->src.u.udp.port; + default: + return tuple->src.u.all; + } +} + static unsigned int target_v0(struct sk_buff **pskb, const struct net_device *in, @@ -35,6 +82,8 @@ return XT_CONTINUE; } +extern DEFINE_PER_CPU(int, sknid_elevator); + static unsigned int target_v1(struct sk_buff **pskb, const struct net_device *in, @@ -44,7 +93,20 @@ const void *targinfo) { const struct xt_mark_target_info_v1 *markinfo = targinfo; - int mark = 0; + enum ip_conntrack_info ctinfo; + struct sock *connection_sk; + int dif; + struct nf_conn *ct; + extern struct inet_hashinfo tcp_hashinfo; + enum ip_conntrack_dir dir; + int *curtag; + u_int32_t src_ip; + u_int32_t dst_ip; + u_int16_t proto, src_port; + u_int32_t ip; + u_int16_t port; + + int mark = -1; switch (markinfo->mode) { case XT_MARK_SET: @@ -58,13 +120,74 @@ case XT_MARK_OR: mark = (*pskb)->mark | markinfo->mark; break; + + case XT_MARK_COPYXID: + + ct = nf_ct_get((*pskb), &ctinfo); + if (!ct) + break; + + dir = CTINFO2DIR(ctinfo); + src_ip = ct->tuplehash[dir].tuple.src.u3.ip; + dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip; + src_port = get_src_port(&ct->tuplehash[dir].tuple); + proto = ct->tuplehash[dir].tuple.dst.protonum; + + dif = ((struct rtable *)(*pskb)->dst)->rt_iif; + ip = ct->tuplehash[dir].tuple.dst.u3.ip; + port = get_dst_port(&ct->tuplehash[dir].tuple); + + if (proto == 1 || proto == 17) { + if ((*pskb)->mark>0) /* The packet is marked, it's going out */ + { + //if (ct->xid[0]>0 && ct->xid[0]!=(*pskb)->mark) + /*printk(KERN_CRIT "xt_MARK log: %d/%d/%d/%d\n",ct->xid[0],(*pskb)->mark,hooknum==NF_IP_LOCAL_IN,proto);*/ + + ct->xid[0]=(*pskb)->mark; } + if (ct->xid[0] > 0) { + mark = ct->xid[0]; + } + + } + else if (proto == 6) { + if ((*pskb)->sk) { + connection_sk = (*pskb)->sk; + sock_hold(connection_sk); + } + else + connection_sk = inet_lookup_established(&tcp_hashinfo, src_ip, src_port, ip, port, dif); + + + if (connection_sk) { + if (connection_sk->sk_state == TCP_TIME_WAIT) { + inet_twsk_put(inet_twsk(connection_sk)); + break; + } + connection_sk->sk_peercred.gid = connection_sk->sk_peercred.uid = ct->xid[dir]; + ct->xid[!dir]=connection_sk->sk_nid; + if (connection_sk->sk_nid != 0) + mark = connection_sk->sk_nid; + sock_put(connection_sk); + } + else + mark = -1 ; + } + break; + } + if (mark != -1) { (*pskb)->mark = mark; + } + + curtag=&__get_cpu_var(sknid_elevator); + if (mark > 0 && *curtag==-2) + { + *curtag = mark; + } return XT_CONTINUE; } - static int checkentry_v0(const char *tablename, const void *entry, @@ -92,7 +215,8 @@ if (markinfo->mode != XT_MARK_SET && markinfo->mode != XT_MARK_AND - && markinfo->mode != XT_MARK_OR) { + && markinfo->mode != XT_MARK_OR + && markinfo->mode != XT_MARK_COPYXID) { printk(KERN_WARNING "MARK: unknown mode %u\n", markinfo->mode); return 0; diff -Nurb linux-2.6.22-510/net/netfilter/xt_SETXID.c linux-2.6.22-520/net/netfilter/xt_SETXID.c --- linux-2.6.22-510/net/netfilter/xt_SETXID.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.22-520/net/netfilter/xt_SETXID.c 2008-06-06 17:07:56.000000000 -0400 @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR(""); +MODULE_DESCRIPTION(""); +MODULE_ALIAS("ipt_SETXID"); + +static unsigned int +target_v1(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + const struct xt_setxid_target_info_v1 *setxidinfo = targinfo; + + switch (setxidinfo->mode) { + case XT_SET_PACKET_XID: + (*pskb)->skb_tag = setxidinfo->mark; + break; + } + return XT_CONTINUE; +} + + +static int +checkentry_v1(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) +{ + struct xt_setxid_target_info_v1 *setxidinfo = targinfo; + + if (setxidinfo->mode != XT_SET_PACKET_XID) { + printk(KERN_WARNING "SETXID: unknown mode %u\n", + setxidinfo->mode); + return 0; + } + + return 1; +} + +static struct xt_target xt_setxid_target[] = { + { + .name = "SETXID", + .family = AF_INET, + .revision = 1, + .checkentry = checkentry_v1, + .target = target_v1, + .targetsize = sizeof(struct xt_setxid_target_info_v1), + .table = "mangle", + .me = THIS_MODULE, + } +}; + +static int __init init(void) +{ + int err; + + err = xt_register_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target)); + return err; +} + +static void __exit fini(void) +{ + xt_unregister_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target)); +} + +module_init(init); +module_exit(fini); diff -Nurb linux-2.6.22-510/net/packet/af_packet.c linux-2.6.22-520/net/packet/af_packet.c --- linux-2.6.22-510/net/packet/af_packet.c 2007-07-08 19:32:17.000000000 -0400 +++ linux-2.6.22-520/net/packet/af_packet.c 2008-06-07 18:30:41.000000000 -0400 @@ -78,6 +78,7 @@ #include #include #include +#include #ifdef CONFIG_INET #include @@ -246,10 +247,13 @@ static const struct proto_ops packet_ops_spkt; +extern DEFINE_PER_CPU(int, sknid_elevator); static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_pkt *spkt; + int tag = skb->skb_tag; + int *elevator=&__get_cpu_var(sknid_elevator); /* * When we registered the protocol we saved the socket in the data @@ -269,6 +273,22 @@ * so that this procedure is noop. */ + /* + * (18:05:41) daniel_hozac: where? + * (18:05:58) daniel_hozac: we already have filters on PF_PACKET, don't we? + * (18:05:58) er: in packet_rcv_skpt + * (18:07:33) daniel_hozac: oh, that's evil. + */ + + if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) { + *elevator=-2; + goto out; + } + else if (!sk->sk_nx_info && *elevator) { + /* Root has already seen this packet */ + goto out; + } + if (skb->pkt_type == PACKET_LOOPBACK) goto out; @@ -324,6 +344,9 @@ __be16 proto=0; int err; + if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) + return -EPERM; + /* * Get and verify the address. */ @@ -420,6 +443,17 @@ unsigned int res) { struct sk_filter *filter; + int tag = skb->skb_tag; + int *elevator=&__get_cpu_var(sknid_elevator); + + if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) { + *elevator=-2; + return 0; + } + else if (!sk->sk_nx_info && *elevator) { + /* Root has already seen this packet */ + return 0; + } rcu_read_lock_bh(); filter = rcu_dereference(sk->sk_filter); @@ -711,6 +745,9 @@ unsigned char *addr; int ifindex, err, reserve = 0; + if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) + return -EPERM; + /* * Get and verify the address. */ @@ -984,8 +1021,9 @@ __be16 proto = (__force __be16)protocol; /* weird, but documented */ int err; - if (!capable(CAP_NET_RAW)) + if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) return -EPERM; + if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && sock->type != SOCK_PACKET) return -ESOCKTNOSUPPORT;