diff -Nurb linux-2.6.27-521/include/linux/netfilter/xt_MARK.h linux-2.6.27-522/include/linux/netfilter/xt_MARK.h --- linux-2.6.27-521/include/linux/netfilter/xt_MARK.h 2008-10-09 18:13:53.000000000 -0400 +++ linux-2.6.27-522/include/linux/netfilter/xt_MARK.h 2009-12-10 12:09:35.000000000 -0500 @@ -11,6 +11,7 @@ XT_MARK_SET=0, XT_MARK_AND, XT_MARK_OR, + XT_MARK_COPYXID, }; struct xt_mark_target_info_v1 { diff -Nurb linux-2.6.27-521/include/linux/netfilter/xt_SETXID.h linux-2.6.27-522/include/linux/netfilter/xt_SETXID.h --- linux-2.6.27-521/include/linux/netfilter/xt_SETXID.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.27-522/include/linux/netfilter/xt_SETXID.h 2009-12-10 12:09:35.000000000 -0500 @@ -0,0 +1,14 @@ +#ifndef _XT_SETXID_H_target +#define _XT_SETXID_H_target + +/* Version 1 */ +enum { + XT_SET_PACKET_XID=0 +}; + +struct xt_setxid_target_info_v1 { + unsigned long mark; + u_int8_t mode; +}; + +#endif /*_XT_SETXID_H_target*/ diff -Nurb linux-2.6.27-521/include/linux/netfilter_ipv4/ipt_MARK.h linux-2.6.27-522/include/linux/netfilter_ipv4/ipt_MARK.h --- linux-2.6.27-521/include/linux/netfilter_ipv4/ipt_MARK.h 2008-10-09 18:13:53.000000000 -0400 +++ linux-2.6.27-522/include/linux/netfilter_ipv4/ipt_MARK.h 2009-12-10 12:09:35.000000000 -0500 @@ -12,6 +12,7 @@ #define IPT_MARK_SET XT_MARK_SET #define IPT_MARK_AND XT_MARK_AND #define IPT_MARK_OR XT_MARK_OR +#define IPT_MARK_COPYXID XT_MARK_COPYXID #define ipt_mark_target_info_v1 xt_mark_target_info_v1 diff -Nurb linux-2.6.27-521/include/linux/netfilter_ipv4/ipt_SETXID.h linux-2.6.27-522/include/linux/netfilter_ipv4/ipt_SETXID.h --- linux-2.6.27-521/include/linux/netfilter_ipv4/ipt_SETXID.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.27-522/include/linux/netfilter_ipv4/ipt_SETXID.h 2009-12-10 12:09:35.000000000 -0500 @@ -0,0 +1,13 @@ +#ifndef _IPT_SETXID_H_target +#define _IPT_SETXID_H_target + +/* Backwards compatibility for old userspace */ + +#include + +/* Version 1 */ +#define IPT_SET_PACKET_XID XT_SET_PACKET_XID + +#define ipt_setxid_target_info_v1 xt_setxid_target_info_v1 + +#endif /*_IPT_SETXID_H_target*/ diff -Nurb linux-2.6.27-521/include/net/netfilter/nf_conntrack.h linux-2.6.27-522/include/net/netfilter/nf_conntrack.h --- linux-2.6.27-521/include/net/netfilter/nf_conntrack.h 2008-10-09 18:13:53.000000000 -0400 +++ linux-2.6.27-522/include/net/netfilter/nf_conntrack.h 2009-12-10 12:09:35.000000000 -0500 @@ -121,6 +121,9 @@ /* Storage reserved for other modules: */ union nf_conntrack_proto proto; + /* PLANETLAB. VNET-specific */ + int xid[IP_CT_DIR_MAX]; + /* Extensions */ struct nf_ct_ext *ext; diff -Nurb linux-2.6.27-521/net/netfilter/Kconfig linux-2.6.27-522/net/netfilter/Kconfig --- linux-2.6.27-521/net/netfilter/Kconfig 2008-10-09 18:13:53.000000000 -0400 +++ linux-2.6.27-522/net/netfilter/Kconfig 2009-12-10 12:09:35.000000000 -0500 @@ -477,6 +477,13 @@ This option adds a "TCPOPTSTRIP" target, which allows you to strip TCP options from TCP packets. +config NETFILTER_XT_TARGET_SETXID + tristate '"SETXID" target support' + depends on NETFILTER_XTABLES + help + This option adds a `SETXID' target, which allows you to alter the + xid of a socket. + config NETFILTER_XT_MATCH_COMMENT tristate '"comment" match support' depends on NETFILTER_XTABLES diff -Nurb linux-2.6.27-521/net/netfilter/Makefile linux-2.6.27-522/net/netfilter/Makefile --- linux-2.6.27-521/net/netfilter/Makefile 2008-10-09 18:13:53.000000000 -0400 +++ linux-2.6.27-522/net/netfilter/Makefile 2009-12-10 12:09:35.000000000 -0500 @@ -38,6 +38,7 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # targets +obj-$(CONFIG_NETFILTER_XT_TARGET_SETXID) += xt_SETXID.o obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o diff -Nurb linux-2.6.27-521/net/netfilter/nf_conntrack_core.c linux-2.6.27-522/net/netfilter/nf_conntrack_core.c --- linux-2.6.27-521/net/netfilter/nf_conntrack_core.c 2008-10-09 18:13:53.000000000 -0400 +++ linux-2.6.27-522/net/netfilter/nf_conntrack_core.c 2009-12-10 12:09:35.000000000 -0500 @@ -595,6 +595,9 @@ /* Overload tuple linked list to put us in unconfirmed list. */ hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed); + ct->xid[IP_CT_DIR_ORIGINAL] = -1; + ct->xid[IP_CT_DIR_REPLY] = -1; + spin_unlock_bh(&nf_conntrack_lock); if (exp) { diff -Nurb linux-2.6.27-521/net/netfilter/xt_MARK.c linux-2.6.27-522/net/netfilter/xt_MARK.c --- linux-2.6.27-521/net/netfilter/xt_MARK.c 2008-10-09 18:13:53.000000000 -0400 +++ linux-2.6.27-522/net/netfilter/xt_MARK.c 2009-12-16 01:39:55.000000000 -0500 @@ -13,7 +13,13 @@ #include #include #include +#include #include +#include +#include +#include + +#include #include #include @@ -24,6 +30,8 @@ MODULE_ALIAS("ipt_MARK"); MODULE_ALIAS("ip6t_MARK"); +extern DEFINE_PER_CPU(int, sknid_elevator); + static unsigned int mark_tg_v0(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -61,14 +69,256 @@ return XT_CONTINUE; } +#define PEERCRED_SET(x) ((x!=0) && (x!=(unsigned int)-1)) + + +static inline u_int16_t +get_dst_port(struct nf_conntrack_tuple *tuple) +{ + switch (tuple->dst.protonum) { + case IPPROTO_GRE: + /* XXX Truncate 32-bit GRE key to 16 bits */ + return tuple->dst.u.gre.key; + case IPPROTO_ICMP: + /* Bind on ICMP echo ID */ + return tuple->src.u.icmp.id; + case IPPROTO_TCP: + return tuple->dst.u.tcp.port; + case IPPROTO_UDP: + return tuple->dst.u.udp.port; + default: + return tuple->dst.u.all; + } +} + +static inline u_int16_t +get_src_port(struct nf_conntrack_tuple *tuple) +{ + switch (tuple->dst.protonum) { + case IPPROTO_GRE: + /* XXX Truncate 32-bit GRE key to 16 bits */ + return htons(ntohl(tuple->src.u.gre.key)); + case IPPROTO_ICMP: + /* Bind on ICMP echo ID */ + return tuple->src.u.icmp.id; + case IPPROTO_TCP: + return tuple->src.u.tcp.port; + case IPPROTO_UDP: + return tuple->src.u.udp.port; + default: + return tuple->src.u.all; + } +} + +static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, + __be16 sport, __be32 daddr, __be16 dport, + int dif, struct hlist_head udptable[]) +{ + struct sock *sk, *result = NULL; + struct hlist_node *node; + unsigned short hnum = ntohs(dport); + int badness = -1; + + read_lock(&udp_hash_lock); + sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { + struct inet_sock *inet = inet_sk(sk); + + if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + !ipv6_only_sock(sk)) { + int score = (sk->sk_family == PF_INET ? 1 : 0); + + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) + continue; + score+=2; + } else { + /* block non nx_info ips */ + if (!v4_addr_in_nx_info(sk->sk_nx_info, + daddr, NXA_MASK_BIND)) + continue; + } + if (inet->daddr) { + if (inet->daddr != saddr) + continue; + score+=2; + } + if (inet->dport) { + if (inet->dport != sport) + continue; + score+=2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score+=2; + } + if (score == 9) { + result = sk; + break; + } else if (score > badness) { + result = sk; + badness = score; + } + } + } + + if (result) + sock_hold(result); + read_unlock(&udp_hash_lock); + return result; +} + +int onceonly = 1; + static unsigned int mark_tg(struct sk_buff *skb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { const struct xt_mark_tginfo2 *info = targinfo; + long mark = -1; + enum ip_conntrack_info ctinfo; + struct sock *connection_sk; + int dif; + struct nf_conn *ct; + extern struct inet_hashinfo tcp_hashinfo; + enum ip_conntrack_dir dir; + int *curtag; + u_int32_t src_ip; + u_int32_t dst_ip; + u_int16_t proto, src_port; + u_int32_t ip; + u_int16_t port; + + + if (info->mark == ~0U) { + // As of 2.6.27.39, Dec 8 2009, + // NetNS + VNET = Trouble + // Let's handle this as a special case + struct net *net = dev_net(skb->dev); + if (!net_eq(net, &init_net)) { + WARN_ON(onceonly); + onceonly = 0; + return XT_CONTINUE; + } + + /* copy-xid */ + dif = ((struct rtable *)(skb->dst))->rt_iif; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct) + goto out_mark_finish; + + dir = CTINFO2DIR(ctinfo); + src_ip = ct->tuplehash[dir].tuple.src.u3.ip; + dst_ip = ct->tuplehash[dir].tuple.dst.u3.ip; + src_port = get_src_port(&ct->tuplehash[dir].tuple); + proto = ct->tuplehash[dir].tuple.dst.protonum; + + ip = ct->tuplehash[dir].tuple.dst.u3.ip; + port = get_dst_port(&ct->tuplehash[dir].tuple); + + if (proto == 1) { + if (skb->mark > 0) + /* The packet is marked, it's going out */ + ct->xid[0] = skb->mark; + + if (ct->xid[0] > 0) + mark = ct->xid[0]; + } + else if (proto == 17) { + struct sock *sk; + if (!skb->mark) { + sk = __udp4_lib_lookup(net,src_ip, src_port, + ip, port, dif, udp_hash); + + if (sk && hooknum == NF_INET_LOCAL_IN) + mark = sk->sk_nid; + + if (sk) + sock_put(sk); + } + else if (skb->mark > 0) + /* The packet is marked, it's going out */ + ct->xid[0] = skb->mark; + } + else if (proto == 6) /* TCP */{ + int sockettype = 0; /* Established socket */ + + /* Looks for an established socket or a listening + socket corresponding to the 4-tuple, in that order. + The order is important for Codemux connections + to be handled properly */ + + connection_sk = inet_lookup_established(net, + &tcp_hashinfo, src_ip, src_port, ip, port, dif); + + if (!connection_sk) { + connection_sk = inet_lookup_listener(net, + &tcp_hashinfo, ip, port, dif); + sockettype = 1; /* Listening socket */ + } + + if (connection_sk) { + if (connection_sk->sk_state == TCP_TIME_WAIT) { + inet_twsk_put(inet_twsk(connection_sk)); + goto out_mark_finish; + } + + /* The peercred is not set. We set it if the other side has an xid. */ + if (!PEERCRED_SET(connection_sk->sk_peercred.uid) + && ct->xid[!dir] > 0 && (sockettype == 0)) { + connection_sk->sk_peercred.gid = + connection_sk->sk_peercred.uid = ct->xid[!dir]; + } + + /* The peercred is set, and is not equal to the XID of 'the other side' */ + else if (PEERCRED_SET(connection_sk->sk_peercred.uid) && + (connection_sk->sk_peercred.uid != ct->xid[!dir]) && + (sockettype == 0)) { + mark = connection_sk->sk_peercred.uid; + } + + /* Has this connection already been tagged? */ + if (ct->xid[dir] < 1) { + /* No - let's tag it */ + ct->xid[dir]=connection_sk->sk_nid; + } + + if (mark == -1 && (ct->xid[dir] != 0)) + mark = ct->xid[dir]; + + sock_put(connection_sk); + } + + /* All else failed. Is this a connection over raw sockets? + That explains why we couldn't get anything out of skb->sk, + or look up a "real" connection. */ + if (ct->xid[dir] < 1) { + if (skb->skb_tag) + ct->xid[dir] = skb->skb_tag; + } + + /* Covers CoDemux case */ + if (mark < 1 && (ct->xid[dir] > 0)) + mark = ct->xid[dir]; + + if (mark < 1 && (ct->xid[!dir] > 0)) + mark = ct->xid[!dir]; + goto out_mark_finish; + } + } + else + mark = (skb->mark & ~info->mask) ^ info->mark; + +out_mark_finish: + if (mark != -1) + skb->mark = mark; + + curtag = &__get_cpu_var(sknid_elevator); + if (mark > 0 && *curtag == -2 && hooknum == NF_INET_LOCAL_IN) + *curtag = mark; - skb->mark = (skb->mark & ~info->mask) ^ info->mark; return XT_CONTINUE; } diff -Nurb linux-2.6.27-521/net/netfilter/xt_SETXID.c linux-2.6.27-522/net/netfilter/xt_SETXID.c --- linux-2.6.27-521/net/netfilter/xt_SETXID.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.27-522/net/netfilter/xt_SETXID.c 2009-12-10 12:09:35.000000000 -0500 @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR(""); +MODULE_DESCRIPTION(""); +MODULE_ALIAS("ipt_SETXID"); + +static unsigned int +target_v1(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + const struct xt_setxid_target_info_v1 *setxidinfo = targinfo; + + switch (setxidinfo->mode) { + case XT_SET_PACKET_XID: + (*pskb)->skb_tag = setxidinfo->mark; + break; + } + return XT_CONTINUE; +} + + +static int +checkentry_v1(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targinfo, + unsigned int hook_mask) +{ + struct xt_setxid_target_info_v1 *setxidinfo = targinfo; + + if (setxidinfo->mode != XT_SET_PACKET_XID) { + printk(KERN_WARNING "SETXID: unknown mode %u\n", + setxidinfo->mode); + return 0; + } + + return 1; +} + +static struct xt_target xt_setxid_target[] = { + { + .name = "SETXID", + .family = AF_INET, + .revision = 1, + .checkentry = checkentry_v1, + .target = target_v1, + .targetsize = sizeof(struct xt_setxid_target_info_v1), + .table = "mangle", + .me = THIS_MODULE, + } +}; + +static int __init init(void) +{ + int err; + + err = xt_register_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target)); + return err; +} + +static void __exit fini(void) +{ + xt_unregister_targets(xt_setxid_target, ARRAY_SIZE(xt_setxid_target)); +} + +module_init(init); +module_exit(fini);