tcpdump/tcp problem
[linux-2.6.git] / linux-2.6-525-sknid-elevator.patch
1 diff -Nurb linux-2.6.22-524/net/core/dev.c linux-2.6.22-525/net/core/dev.c
2 --- linux-2.6.22-524/net/core/dev.c     2008-07-15 11:39:32.000000000 -0400
3 +++ linux-2.6.22-525/net/core/dev.c     2008-07-21 15:20:43.000000000 -0400
4 @@ -1131,7 +1131,7 @@
5                 if ((ptype->dev == dev || !ptype->dev) &&
6                     (ptype->af_packet_priv == NULL ||
7                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
8 -                       struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
9 +                       struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
10                         if (!skb2)
11                                 break;
12  
13 @@ -1803,6 +1803,7 @@
14   * the ingress scheduler, you just cant add policies on ingress.
15   *
16   */
17 +
18  static int ing_filter(struct sk_buff *skb)
19  {
20         struct Qdisc *q;
21 @@ -1832,13 +1833,21 @@
22  }
23  #endif
24  
25 +/* The code already makes the assumption that packet handlers run
26 + * sequentially on the same CPU. -Sapan */
27 +DEFINE_PER_CPU(int, sknid_elevator);
28 +
29  int netif_receive_skb(struct sk_buff *skb)
30  {
31         struct packet_type *ptype, *pt_prev;
32         struct net_device *orig_dev;
33         int ret = NET_RX_DROP;
34 +       int *cur_elevator=&__get_cpu_var(sknid_elevator);
35 +       struct sk_buff *skb2;
36         __be16 type;
37  
38 +       *cur_elevator = 0;
39 +
40         /* if we've gotten here through NAPI, check netpoll */
41         if (skb->dev->poll && netpoll_rx(skb))
42                 return NET_RX_DROP;
43 @@ -1873,8 +1882,9 @@
44  
45         list_for_each_entry_rcu(ptype, &ptype_all, list) {
46                 if (!ptype->dev || ptype->dev == skb->dev) {
47 -                       if (pt_prev)
48 +                       if (pt_prev) {
49                                 ret = deliver_skb(skb, pt_prev, orig_dev);
50 +                       }
51                         pt_prev = ptype;
52                 }
53         }
54 @@ -1902,6 +1912,14 @@
55         if (!skb)
56                 goto out;
57  
58 +       /* We don't want the packet handlers to throw the packet away
59 +        * if we want the taps to treat it again - Sapan */
60 +       if (*cur_elevator) {
61 +               skb2 = skb_copy(skb,GFP_ATOMIC);
62 +               if (!skb2) *cur_elevator=0;
63 +       }
64 +
65 +
66         type = skb->protocol;
67         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
68                 if (ptype->type == type &&
69 @@ -1914,6 +1932,7 @@
70  
71         if (pt_prev) {
72                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
73 +               
74         } else {
75                 kfree_skb(skb);
76                 /* Jamal, now you will not able to escape explaining
77 @@ -1922,6 +1941,18 @@
78                 ret = NET_RX_DROP;
79         }
80  
81 +       if (*cur_elevator) {
82 +               skb2->skb_tag = *cur_elevator;
83 +               list_for_each_entry_rcu(ptype, &ptype_all, list) {
84 +                       if (!ptype->dev || ptype->dev == skb2->dev) {
85 +                               ret = deliver_skb(skb2, ptype, orig_dev);
86 +                       }
87 +               }
88 +               /* We have a packet */
89 +               kfree_skb(skb2);
90 +       }
91 +       *cur_elevator=0;
92 +
93  out:
94         rcu_read_unlock();
95         return ret;
96 @@ -3780,6 +3811,7 @@
97  EXPORT_SYMBOL(net_enable_timestamp);
98  EXPORT_SYMBOL(net_disable_timestamp);
99  EXPORT_SYMBOL(dev_get_flags);
100 +EXPORT_PER_CPU_SYMBOL(sknid_elevator);
101  
102  #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
103  EXPORT_SYMBOL(br_handle_frame_hook);
104 diff -Nurb linux-2.6.22-524/net/packet/af_packet.c linux-2.6.22-525/net/packet/af_packet.c
105 --- linux-2.6.22-524/net/packet/af_packet.c     2007-07-08 19:32:17.000000000 -0400
106 +++ linux-2.6.22-525/net/packet/af_packet.c     2008-07-15 11:40:11.000000000 -0400
107 @@ -78,6 +78,7 @@
108  #include <linux/poll.h>
109  #include <linux/module.h>
110  #include <linux/init.h>
111 +#include <linux/vs_network.h>
112  
113  #ifdef CONFIG_INET
114  #include <net/inet_common.h>
115 @@ -246,10 +247,13 @@
116  
117  static const struct proto_ops packet_ops_spkt;
118  
119 +extern DEFINE_PER_CPU(int, sknid_elevator);
120  static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
121  {
122         struct sock *sk;
123         struct sockaddr_pkt *spkt;
124 +       int tag = skb->skb_tag;
125 +       int *elevator=&__get_cpu_var(sknid_elevator);
126  
127         /*
128          *      When we registered the protocol we saved the socket in the data
129 @@ -269,6 +273,22 @@
130          *      so that this procedure is noop.
131          */
132  
133 +       /* 
134 +        * (18:05:41) daniel_hozac: where?
135 +        * (18:05:58) daniel_hozac: we already have filters on PF_PACKET, don't we?
136 +        * (18:05:58) er: in packet_rcv_skpt
137 +        * (18:07:33) daniel_hozac: oh, that's evil. 
138 +        */
139 +
140 +       if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
141 +               *elevator=-2;
142 +               goto out;
143 +       }
144 +       else if (!sk->sk_nx_info && *elevator) {
145 +               /* Root has already seen this packet */
146 +               goto out;
147 +       }
148 +
149         if (skb->pkt_type == PACKET_LOOPBACK)
150                 goto out;
151  
152 @@ -324,6 +344,9 @@
153         __be16 proto=0;
154         int err;
155  
156 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND))
157 +               return -EPERM;
158 +
159         /*
160          *      Get and verify the address.
161          */
162 @@ -420,6 +443,17 @@
163                                       unsigned int res)
164  {
165         struct sk_filter *filter;
166 +       int tag = skb->skb_tag;
167 +       int *elevator=&__get_cpu_var(sknid_elevator);
168 +
169 +       if (sk->sk_nx_info && !(tag == 1 || sk->sk_nid == tag)) {
170 +               *elevator=-2;
171 +               return 0;
172 +       }
173 +       else if (!sk->sk_nx_info && *elevator) {
174 +               /* Root has already seen this packet */
175 +               return 0;
176 +       }
177  
178         rcu_read_lock_bh();
179         filter = rcu_dereference(sk->sk_filter);
180 @@ -711,6 +745,9 @@
181         unsigned char *addr;
182         int ifindex, err, reserve = 0;
183  
184 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SEND)) 
185 +               return -EPERM;
186 +
187         /*
188          *      Get and verify the address.
189          */
190 @@ -984,8 +1021,9 @@
191         __be16 proto = (__force __be16)protocol; /* weird, but documented */
192         int err;
193  
194 -       if (!capable(CAP_NET_RAW))
195 +       if (!nx_capable(CAP_NET_RAW, NXC_RAW_SOCKET))
196                 return -EPERM;
197 +               
198         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
199             sock->type != SOCK_PACKET)
200                 return -ESOCKTNOSUPPORT;