Optimization
[linux-2.6.git] / linux-2.6-525-sknid-elevator.patch
1 diff -Nurb linux-2.6.22-524/net/core/dev.c linux-2.6.22-525/net/core/dev.c
2 --- linux-2.6.22-524/net/core/dev.c     2008-07-15 11:39:32.000000000 -0400
3 +++ linux-2.6.22-525/net/core/dev.c     2008-07-21 16:22:33.000000000 -0400
4 @@ -1131,7 +1131,7 @@
5                 if ((ptype->dev == dev || !ptype->dev) &&
6                     (ptype->af_packet_priv == NULL ||
7                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
8 -                       struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
9 +                       struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
10                         if (!skb2)
11                                 break;
12  
13 @@ -1803,6 +1803,7 @@
14   * the ingress scheduler, you just cant add policies on ingress.
15   *
16   */
17 +
18  static int ing_filter(struct sk_buff *skb)
19  {
20         struct Qdisc *q;
21 @@ -1832,13 +1833,21 @@
22  }
23  #endif
24  
25 +/* The code already makes the assumption that packet handlers run
26 + * sequentially on the same CPU. -Sapan */
27 +DEFINE_PER_CPU(int, sknid_elevator);
28 +
29  int netif_receive_skb(struct sk_buff *skb)
30  {
31         struct packet_type *ptype, *pt_prev;
32         struct net_device *orig_dev;
33         int ret = NET_RX_DROP;
34 +       int *cur_elevator=&__get_cpu_var(sknid_elevator);
35 +       struct sk_buff *skb2;
36         __be16 type;
37  
38 +       *cur_elevator = 0;
39 +
40         /* if we've gotten here through NAPI, check netpoll */
41         if (skb->dev->poll && netpoll_rx(skb))
42                 return NET_RX_DROP;
43 @@ -1873,8 +1882,9 @@
44  
45         list_for_each_entry_rcu(ptype, &ptype_all, list) {
46                 if (!ptype->dev || ptype->dev == skb->dev) {
47 -                       if (pt_prev)
48 +                       if (pt_prev) {
49                                 ret = deliver_skb(skb, pt_prev, orig_dev);
50 +                       }
51                         pt_prev = ptype;
52                 }
53         }
54 @@ -1902,6 +1912,14 @@
55         if (!skb)
56                 goto out;
57  
58 +       /* We don't want the packet handlers to throw the packet away
59 +        * if we want the taps to treat it again - Sapan */
60 +       if (*cur_elevator) {
61 +               skb2 = skb_copy(skb,GFP_ATOMIC);
62 +               if (!skb2) *cur_elevator=0;
63 +       }
64 +
65 +
66         type = skb->protocol;
67         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
68                 if (ptype->type == type &&
69 @@ -1914,6 +1932,7 @@
70  
71         if (pt_prev) {
72                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
73 +               
74         } else {
75                 kfree_skb(skb);
76                 /* Jamal, now you will not able to escape explaining
77 @@ -1922,6 +1941,21 @@
78                 ret = NET_RX_DROP;
79         }
80  
81 +       if (*cur_elevator>0) {
82 +               skb2->skb_tag = *cur_elevator;
83 +               list_for_each_entry_rcu(ptype, &ptype_all, list) {
84 +                       if (!ptype->dev || ptype->dev == skb2->dev) {
85 +                               ret = deliver_skb(skb2, ptype, orig_dev);
86 +                       }
87 +               }
88 +       }
89 +       if (*cur_elevator) {
90 +               /* We have a packet */
91 +               kfree_skb(skb2);
92 +       }
93 +
94 +       *cur_elevator=0;
95 +
96  out:
97         rcu_read_unlock();
98         return ret;
99 @@ -3780,6 +3814,7 @@
100  EXPORT_SYMBOL(net_enable_timestamp);
101  EXPORT_SYMBOL(net_disable_timestamp);
102  EXPORT_SYMBOL(dev_get_flags);
103 +EXPORT_PER_CPU_SYMBOL(sknid_elevator);
104  
105  #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
106  EXPORT_SYMBOL(br_handle_frame_hook);
107 diff -Nurb linux-2.6.22-524/net/packet/af_packet.c linux-2.6.22-525/net/packet/af_packet.c
108 --- linux-2.6.22-524/net/packet/af_packet.c     2007-07-08 19:32:17.000000000 -0400
109 +++ linux-2.6.22-525/net/packet/af_packet.c     2008-07-15 11:40:11.000000000 -0400
110 @@ -78,6 +78,7 @@
111  #include <linux/poll.h>
112  #include <linux/module.h>
113  #include <linux/init.h>
114 +#include <linux/vs_network.h>
115  
116  #ifdef CONFIG_INET
117  #include <net/inet_common.h>
118 @@ -246,10 +247,13 @@
119  
120  static const struct proto_ops packet_ops_spkt;
121  
122 +extern DEFINE_PER_CPU(int, sknid_elevator);
123  static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
124  {
125         struct sock *sk;
126         struct sockaddr_pkt *spkt;
127 +       int tag = skb->skb_tag;
128 +       int *elevator=&__get_cpu_var(sknid_elevator);
129  
130         /*
131          *      When we registered the protocol we saved the socket in the data
132 @@ -269,6 +273,22 @@
133          *      so that this procedure is noop.
134          */
135  
136 +       /* 
137 +        * (18:05:41) daniel_hozac: where?
138 +        * (18:05:58) daniel_hozac: we already have filters on PF_PACKET, don't we?
139 +        * (18:05:58) er: in packet_rcv_skpt
140 +        * (18:07:33) daniel_hozac: oh, that's evil. 
141 +        */
142 +
143 +       if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
144 +               *elevator=-2;
145 +               goto out;
146 +       }
147 +       else if (!sk->sk_nx_info && *elevator) {
148 +               /* Root has already seen this packet */
149 +               goto out;
150 +       }
151 +
152         if (skb->pkt_type == PACKET_LOOPBACK)
153                 goto out;
154  
155 @@ -324,6 +344,9 @@
156         __be16 proto=0;
157         int err;
158  
159 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
160 +               return -EPERM;
161 +
162         /*
163          *      Get and verify the address.
164          */
165 @@ -420,6 +443,17 @@
166                                       unsigned int res)
167  {
168         struct sk_filter *filter;
169 +       int tag = skb->skb_tag;
170 +       int *elevator=&__get_cpu_var(sknid_elevator);
171 +
172 +       if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
173 +               *elevator=-2;
174 +               return 0;
175 +       }
176 +       else if (!sk->sk_nx_info && *elevator) {
177 +               /* Root has already seen this packet */
178 +               return 0;
179 +       }
180  
181         rcu_read_lock_bh();
182         filter = rcu_dereference(sk->sk_filter);
183 @@ -711,6 +745,9 @@
184         unsigned char *addr;
185         int ifindex, err, reserve = 0;
186  
187 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) 
188 +               return -EPERM;
189 +
190         /*
191          *      Get and verify the address.
192          */
193 @@ -984,8 +1021,9 @@
194         __be16 proto = (__force __be16)protocol; /* weird, but documented */
195         int err;
196  
197 -       if (!capable(CAP_NET_RAW))
198 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET))
199                 return -EPERM;
200 +               
201         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
202             sock->type != SOCK_PACKET)
203                 return -ESOCKTNOSUPPORT;