From e0c419ad6eed2efa33d93c9cb89f1e5573f83173 Mon Sep 17 00:00:00 2001 From: =?utf8?q?S=2E=C3=87a=C4=9Flar=20Onur?= Date: Thu, 29 Apr 2010 18:03:36 +0000 Subject: [PATCH] more patches --- kernel.spec | 4 + linux-2.6-524-peercred.patch | 31 ++++ linux-2.6-525-sknid-elevator.patch | 239 +++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+) create mode 100644 linux-2.6-524-peercred.patch create mode 100644 linux-2.6-525-sknid-elevator.patch diff --git a/kernel.spec b/kernel.spec index 6864f9da6..b921e8392 100644 --- a/kernel.spec +++ b/kernel.spec @@ -1858,6 +1858,8 @@ Patch90521: linux-2.6-521-packet-tagging.patch #Patch90522: linux-2.6-522-iptables-connection-tagging.patch # Patch90523: linux-2.6-523-raw-sockets.patch +Patch90524: linux-2.6-524-peercred.patch +Patch90525: linux-2.6-525-sknid-elevator.patch # empty final patch file to facilitate testing of kernel patches Patch99999: linux-kernel-test.patch @@ -3483,6 +3485,8 @@ ApplyPatch linux-2.6-521-packet-tagging.patch #ApplyPatch linux-2.6-522-iptables-connection-tagging.patch # ApplyPatch linux-2.6-523-raw-sockets.patch +ApplyPatch linux-2.6-524-peercred.patch +ApplyPatch linux-2.6-525-sknid-elevator.patch ApplyOptionalPatch linux-kernel-test.patch diff --git a/linux-2.6-524-peercred.patch b/linux-2.6-524-peercred.patch new file mode 100644 index 000000000..3ed895c0a --- /dev/null +++ b/linux-2.6-524-peercred.patch @@ -0,0 +1,31 @@ +commit 1707cc914729121d784b444ce64cbea791f8c2e8 +Author: root +Date: Thu Apr 29 09:56:51 2010 -0400 + + linux-2.6-524-peercred.patch + +diff --git a/include/linux/socket.h b/include/linux/socket.h +index 3273a0c..251b1b1 100644 +--- a/include/linux/socket.h ++++ b/include/linux/socket.h +@@ -292,6 +292,8 @@ struct ucred { + #define SOL_RDS 276 + #define SOL_IUCV 277 + ++#define SO_SETXID SO_PEERCRED ++ + /* IPX options */ + #define IPX_TYPE 1 + +diff --git a/net/core/sock.c b/net/core/sock.c +index ac135b9..3d808cc 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -542,6 +542,7 @@ set_sndbuf: + } + sk->sk_xid = val; + sk->sk_nid = val; ++ sk->sk_peercred.uid = sk->sk_peercred.gid = val; + break; + + case SO_RCVBUF: diff --git a/linux-2.6-525-sknid-elevator.patch b/linux-2.6-525-sknid-elevator.patch new file mode 100644 index 000000000..87d88fcf8 --- /dev/null +++ b/linux-2.6-525-sknid-elevator.patch @@ -0,0 +1,239 @@ +commit a57d715bc58005cfae0fdf1626ebf11b11508025 +Author: root +Date: Thu Apr 29 10:01:21 2010 -0400 + + linux-2.6-525-sknid-elevator.patch + +diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h +index 4267c8b..3f36a91 100644 +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -1057,6 +1057,7 @@ struct napi_gro_cb { + struct packet_type { + __be16 type; /* This is really htons(ether_type). */ + struct net_device *dev; /* NULL is wildcarded here */ ++ unsigned char sknid_elevator; + int (*func) (struct sk_buff *, + struct net_device *, + struct packet_type *, +diff --git a/net/core/dev.c b/net/core/dev.c +index 8b6b941..651a1c3 100644 +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -99,6 +99,8 @@ + #include + #include + #include ++#include ++#include + #include + #include + #include +@@ -2275,6 +2277,10 @@ void netif_nit_deliver(struct sk_buff *skb) + rcu_read_unlock(); + } + ++/* The code already makes the assumption that packet handlers run ++ * sequentially on the same CPU. -Sapan */ ++DEFINE_PER_CPU(int, sknid_elevator) = 0; ++ + /** + * netif_receive_skb - process receive buffer from network + * @skb: buffer to process +@@ -2296,8 +2302,11 @@ int netif_receive_skb(struct sk_buff *skb) + struct net_device *orig_dev; + struct net_device *null_or_orig; + int ret = NET_RX_DROP; ++ int *cur_elevator = &__get_cpu_var(sknid_elevator); + __be16 type; + ++ *cur_elevator = 0; ++ + if (!skb->tstamp.tv64) + net_timestamp(skb); + +@@ -2373,7 +2382,27 @@ ncls: + } + + if (pt_prev) { ++ /* At this point, cur_elevator may be -2 or a positive value, in ++ * case a previous protocol handler marked it */ ++ if (*cur_elevator) { ++ atomic_inc(&skb->users); ++ } ++ + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); ++ ++ if ((*cur_elevator)>0) { ++ skb->skb_tag = *cur_elevator; ++ list_for_each_entry_rcu(ptype, &ptype_all, list) { ++ if ((!ptype->dev || ptype->dev == skb->dev) && (ptype->sknid_elevator)) { ++ ret = deliver_skb(skb, ptype, orig_dev); ++ } ++ } ++ } ++ ++ if (*cur_elevator) { ++ /* We have a packet */ ++ kfree_skb(skb); ++ } + } else { + kfree_skb(skb); + /* Jamal, now you will not able to escape explaining +@@ -4127,6 +4156,7 @@ unsigned dev_get_flags(const struct net_device *dev) + return flags; + } + EXPORT_SYMBOL(dev_get_flags); ++EXPORT_PER_CPU_SYMBOL(sknid_elevator); + + /** + * dev_change_flags - change device settings +diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c +index 1bd109e..5c2e9ad 100644 +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -78,6 +78,7 @@ + #include + #include + #include ++#include + #include + + #ifdef CONFIG_INET +@@ -337,12 +338,54 @@ static const struct proto_ops packet_ops; + + static const struct proto_ops packet_ops_spkt; + ++extern DEFINE_PER_CPU(int, sknid_elevator); ++ ++static inline unsigned int slice_check_and_elevate(struct sk_buff *skb, struct sock *sk) { ++ /* This mechanism is quite involved, and caused us a lot of pain ++ * including crashes and packet loss during the 4.2 rollout. This ++ * function decides if a slice is allowed to see a given packet. ++ * Unfortunately, the first time it is invoked for a packet it does not ++ * have enough information to make this call, since xt_MARK has not had ++ * a chance to tag it with the slice id. There is also no way of ++ * passing state between xt_MARK and this function through a packet -- ++ * because the skb gets cloned quite a few times between these two ++ * points. I'd rather not use skb_shared_info because it's treated as ++ * a blob of memory, and so it would be quite hard to maintain. ++ * ++ * What we do is to keep a global variable (per CPU) that transfers the ++ * required state between xt_MARK and af_packet.c. As an optimization, ++ * this state transfer and the step that follows is only executed for ++ * packets that first get dropped here. When we drop a packet, we mark ++ * it for 'elevation' (that's what this trick is called). When xt_MARK ++ * tags the packet with the right slice, it intercepts this mark and ++ * sets the value of sknid_elevator. Next, the packet is sent back here ++ * for a second round, this time with the xid tag set. ++ */ ++ ++ int *elevator=&__get_cpu_var(sknid_elevator); ++ int tag = skb->skb_tag; ++ ++ if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) { ++ if (skb->pkt_type==PACKET_HOST) { ++ *elevator=-2; /* Rejecting this packet. Mark it for elevation in xt_MARK */ ++ } ++ return 0; ++ } ++ else if (!sk->sk_nx_info && (*elevator>0)) { ++ /* Root has already seen this packet once, since it has been elevated */ ++ return 0; ++ } ++ ++ return 1; ++} ++ + static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) + { + struct sock *sk; + struct sockaddr_pkt *spkt; +- ++ int tag = skb->skb_tag; ++ + /* + * When we registered the protocol we saved the socket in the data + * field for just this event. +@@ -361,6 +404,16 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, + * so that this procedure is noop. + */ + ++ /* ++ * (18:05:41) daniel_hozac: where? ++ * (18:05:58) daniel_hozac: we already have filters on PF_PACKET, don't we? ++ * (18:05:58) er: in packet_rcv_skpt ++ * (18:07:33) daniel_hozac: oh, that's evil. ++ */ ++ ++ if (!slice_check_and_elevate(skb, sk)) ++ return 0; ++ + if (skb->pkt_type == PACKET_LOOPBACK) + goto out; + +@@ -419,6 +472,9 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, + __be16 proto = 0; + int err; + ++ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) ++ return -EPERM; ++ + /* + * Get and verify the address. + */ +@@ -509,11 +565,16 @@ out_unlock: + return err; + } + ++ ++ + static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, + unsigned int res) + { + struct sk_filter *filter; + ++ if (!slice_check_and_elevate(skb, sk)) ++ return 0; ++ + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter != NULL) +@@ -1063,6 +1124,9 @@ static int packet_snd(struct socket *sock, + unsigned char *addr; + int ifindex, err, reserve = 0; + ++ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) ++ return -EPERM; ++ + /* + * Get and verify the address. + */ +@@ -1248,6 +1312,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc + + po->num = protocol; + po->prot_hook.type = protocol; ++ po->prot_hook.sknid_elevator = 1; + po->prot_hook.dev = dev; + + po->ifindex = dev ? dev->ifindex : 0; +@@ -1348,8 +1413,9 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, + __be16 proto = (__force __be16)protocol; /* weird, but documented */ + int err; + +- if (!capable(CAP_NET_RAW)) ++ if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET)) + return -EPERM; ++ + if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && + sock->type != SOCK_PACKET) + return -ESOCKTNOSUPPORT; +@@ -1381,6 +1447,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, + spin_lock_init(&po->bind_lock); + mutex_init(&po->pg_vec_lock); + po->prot_hook.func = packet_rcv; ++ po->prot_hook.sknid_elevator = 1; + + if (sock->type == SOCK_PACKET) + po->prot_hook.func = packet_rcv_spkt; -- 2.43.0