adding the patch file too
[linux-2.6.git] / linux-2.6-525-sknid-elevator.patch
1 diff -Nurb linux-2.6.22-510/net/core/dev.c linux-2.6.22-520/net/core/dev.c
2 --- linux-2.6.22-510/net/core/dev.c     2008-06-06 17:07:48.000000000 -0400
3 +++ linux-2.6.22-520/net/core/dev.c     2008-06-06 17:07:56.000000000 -0400
4 @@ -1803,6 +1803,7 @@
5   * the ingress scheduler, you just cant add policies on ingress.
6   *
7   */
8 +
9  static int ing_filter(struct sk_buff *skb)
10  {
11         struct Qdisc *q;
12 @@ -1832,13 +1833,20 @@
13  }
14  #endif
15  
16 +/* The code already makes the assumption that packet handlers run
17 + * sequentially on the same CPU. -Sapan */
18 +DEFINE_PER_CPU(int, sknid_elevator);
19 +
20  int netif_receive_skb(struct sk_buff *skb)
21  {
22         struct packet_type *ptype, *pt_prev;
23         struct net_device *orig_dev;
24         int ret = NET_RX_DROP;
25 +       int *cur_elevator=&__get_cpu_var(sknid_elevator);
26         __be16 type;
27  
28 +       *cur_elevator = 0;
29 +
30         /* if we've gotten here through NAPI, check netpoll */
31         if (skb->dev->poll && netpoll_rx(skb))
32                 return NET_RX_DROP;
33 @@ -1873,8 +1881,9 @@
34  
35         list_for_each_entry_rcu(ptype, &ptype_all, list) {
36                 if (!ptype->dev || ptype->dev == skb->dev) {
37 -                       if (pt_prev)
38 +                       if (pt_prev) {
39                                 ret = deliver_skb(skb, pt_prev, orig_dev);
40 +                       }
41                         pt_prev = ptype;
42                 }
43         }
44 @@ -1912,8 +1921,22 @@
45                 }
46         }
47  
48 +       /* We don't want the packet handlers to throw the packet away
49 +        * if we want the taps to treat it again - Sapan */
50 +       if (*cur_elevator) {
51 +               atomic_inc(&skb->users);
52 +       }
53 +
54         if (pt_prev) {
55                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
56 +                       if (*cur_elevator > 0) {
57 +                               skb->skb_tag = *cur_elevator;
58 +                               list_for_each_entry_rcu(ptype, &ptype_all, list) {
59 +                                       if (!ptype->dev || ptype->dev == skb->dev) {
60 +                                                       ret = deliver_skb(skb, ptype, orig_dev);
61 +                                       }
62 +                               }
63 +                       }
64         } else {
65                 kfree_skb(skb);
66                 /* Jamal, now you will not able to escape explaining
67 @@ -1922,6 +1945,13 @@
68                 ret = NET_RX_DROP;
69         }
70  
71 +       if (*cur_elevator) {
72 +               /* We have a packet */
73 +               kfree_skb(skb);
74 +       }
75 +
76 +       *cur_elevator=0;
77 +
78  out:
79         rcu_read_unlock();
80         return ret;
81 @@ -3780,6 +3810,7 @@
82  EXPORT_SYMBOL(net_enable_timestamp);
83  EXPORT_SYMBOL(net_disable_timestamp);
84  EXPORT_SYMBOL(dev_get_flags);
85 +EXPORT_PER_CPU_SYMBOL(sknid_elevator);
86  
87  #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
88  EXPORT_SYMBOL(br_handle_frame_hook);
89 diff -Nurb linux-2.6.22-510/net/packet/af_packet.c linux-2.6.22-520/net/packet/af_packet.c
90 --- linux-2.6.22-510/net/packet/af_packet.c     2007-07-08 19:32:17.000000000 -0400
91 +++ linux-2.6.22-520/net/packet/af_packet.c     2008-06-07 18:30:41.000000000 -0400
92 @@ -78,6 +78,7 @@
93  #include <linux/poll.h>
94  #include <linux/module.h>
95  #include <linux/init.h>
96 +#include <linux/vs_network.h>
97  
98  #ifdef CONFIG_INET
99  #include <net/inet_common.h>
100 @@ -246,10 +247,13 @@
101  
102  static const struct proto_ops packet_ops_spkt;
103  
104 +extern DEFINE_PER_CPU(int, sknid_elevator);
105  static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
106  {
107         struct sock *sk;
108         struct sockaddr_pkt *spkt;
109 +       int tag = skb->skb_tag;
110 +       int *elevator=&__get_cpu_var(sknid_elevator);
111  
112         /*
113          *      When we registered the protocol we saved the socket in the data
114 @@ -269,6 +273,22 @@
115          *      so that this procedure is noop.
116          */
117  
118 +       /* 
119 +        * (18:05:41) daniel_hozac: where?
120 +        * (18:05:58) daniel_hozac: we already have filters on PF_PACKET, don't we?
121 +        * (18:05:58) er: in packet_rcv_skpt
122 +        * (18:07:33) daniel_hozac: oh, that's evil. 
123 +        */
124 +
125 +       if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
126 +               *elevator=-2;
127 +               goto out;
128 +       }
129 +       else if (!sk->sk_nx_info && *elevator) {
130 +               /* Root has already seen this packet */
131 +               goto out;
132 +       }
133 +
134         if (skb->pkt_type == PACKET_LOOPBACK)
135                 goto out;
136  
137 @@ -324,6 +344,9 @@
138         __be16 proto=0;
139         int err;
140  
141 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
142 +               return -EPERM;
143 +
144         /*
145          *      Get and verify the address.
146          */
147 @@ -420,6 +443,17 @@
148                                       unsigned int res)
149  {
150         struct sk_filter *filter;
151 +       int tag = skb->skb_tag;
152 +       int *elevator=&__get_cpu_var(sknid_elevator);
153 +
154 +       if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
155 +               *elevator=-2;
156 +               return 0;
157 +       }
158 +       else if (!sk->sk_nx_info && *elevator) {
159 +               /* Root has already seen this packet */
160 +               return 0;
161 +       }
162  
163         rcu_read_lock_bh();
164         filter = rcu_dereference(sk->sk_filter);
165 @@ -711,6 +745,9 @@
166         unsigned char *addr;
167         int ifindex, err, reserve = 0;
168  
169 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) 
170 +               return -EPERM;
171 +
172         /*
173          *      Get and verify the address.
174          */
175 @@ -984,8 +1021,9 @@
176         __be16 proto = (__force __be16)protocol; /* weird, but documented */
177         int err;
178  
179 -       if (!capable(CAP_NET_RAW))
180 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET))
181                 return -EPERM;
182 +               
183         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
184             sock->type != SOCK_PACKET)
185                 return -ESOCKTNOSUPPORT;