ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / core / netfilter.c
1 /* netfilter.c: look after the filters for various protocols. 
2  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
3  *
4  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5  * way.
6  *
7  * Rusty Russell (C)2000 -- This code is GPL.
8  *
9  * February 2000: Modified by James Morris to have 1 queue per protocol.
10  * 15-Mar-2000:   Added NF_REPEAT --RR.
11  * 08-May-2003:   Internal logging interface added by Jozsef Kadlecsik.
12  */
13 #include <linux/config.h>
14 #include <linux/kernel.h>
15 #include <linux/netfilter.h>
16 #include <net/protocol.h>
17 #include <linux/init.h>
18 #include <linux/skbuff.h>
19 #include <linux/wait.h>
20 #include <linux/module.h>
21 #include <linux/interrupt.h>
22 #include <linux/if.h>
23 #include <linux/netdevice.h>
24 #include <linux/inetdevice.h>
25 #include <linux/tcp.h>
26 #include <linux/udp.h>
27 #include <linux/icmp.h>
28 #include <net/sock.h>
29 #include <net/route.h>
30 #include <linux/ip.h>
31
32 /* In this code, we can be waiting indefinitely for userspace to
33  * service a packet if a hook returns NF_QUEUE.  We could keep a count
34  * of skbuffs queued for userspace, and not deregister a hook unless
35  * this is zero, but that sucks.  Now, we simply check when the
36  * packets come back: if the hook is gone, the packet is discarded. */
37 #ifdef CONFIG_NETFILTER_DEBUG
38 #define NFDEBUG(format, args...)  printk(format , ## args)
39 #else
40 #define NFDEBUG(format, args...)
41 #endif
42
43 /* Sockopts only registered and called from user context, so
44    net locking would be overkill.  Also, [gs]etsockopt calls may
45    sleep. */
46 static DECLARE_MUTEX(nf_sockopt_mutex);
47
48 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
49 static LIST_HEAD(nf_sockopts);
50 static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED;
51
52 /* 
53  * A queue handler may be registered for each protocol.  Each is protected by
54  * long term mutex.  The handler must provide an an outfn() to accept packets
55  * for queueing and must reinject all packets it receives, no matter what.
56  */
57 static struct nf_queue_handler_t {
58         nf_queue_outfn_t outfn;
59         void *data;
60 } queue_handler[NPROTO];
61 static rwlock_t queue_handler_lock = RW_LOCK_UNLOCKED;
62
63 int nf_register_hook(struct nf_hook_ops *reg)
64 {
65         struct list_head *i;
66
67         spin_lock_bh(&nf_hook_lock);
68         list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
69                 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
70                         break;
71         }
72         list_add_rcu(&reg->list, i->prev);
73         spin_unlock_bh(&nf_hook_lock);
74
75         synchronize_net();
76         return 0;
77 }
78
79 void nf_unregister_hook(struct nf_hook_ops *reg)
80 {
81         spin_lock_bh(&nf_hook_lock);
82         list_del_rcu(&reg->list);
83         spin_unlock_bh(&nf_hook_lock);
84
85         synchronize_net();
86 }
87
88 /* Do exclusive ranges overlap? */
89 static inline int overlap(int min1, int max1, int min2, int max2)
90 {
91         return max1 > min2 && min1 < max2;
92 }
93
94 /* Functions to register sockopt ranges (exclusive). */
95 int nf_register_sockopt(struct nf_sockopt_ops *reg)
96 {
97         struct list_head *i;
98         int ret = 0;
99
100         if (down_interruptible(&nf_sockopt_mutex) != 0)
101                 return -EINTR;
102
103         list_for_each(i, &nf_sockopts) {
104                 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
105                 if (ops->pf == reg->pf
106                     && (overlap(ops->set_optmin, ops->set_optmax, 
107                                 reg->set_optmin, reg->set_optmax)
108                         || overlap(ops->get_optmin, ops->get_optmax, 
109                                    reg->get_optmin, reg->get_optmax))) {
110                         NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
111                                 ops->set_optmin, ops->set_optmax, 
112                                 ops->get_optmin, ops->get_optmax, 
113                                 reg->set_optmin, reg->set_optmax,
114                                 reg->get_optmin, reg->get_optmax);
115                         ret = -EBUSY;
116                         goto out;
117                 }
118         }
119
120         list_add(&reg->list, &nf_sockopts);
121 out:
122         up(&nf_sockopt_mutex);
123         return ret;
124 }
125
126 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
127 {
128         /* No point being interruptible: we're probably in cleanup_module() */
129  restart:
130         down(&nf_sockopt_mutex);
131         if (reg->use != 0) {
132                 /* To be woken by nf_sockopt call... */
133                 /* FIXME: Stuart Young's name appears gratuitously. */
134                 set_current_state(TASK_UNINTERRUPTIBLE);
135                 reg->cleanup_task = current;
136                 up(&nf_sockopt_mutex);
137                 schedule();
138                 goto restart;
139         }
140         list_del(&reg->list);
141         up(&nf_sockopt_mutex);
142 }
143
144 #ifdef CONFIG_NETFILTER_DEBUG
145 #include <net/ip.h>
146 #include <net/tcp.h>
147 #include <linux/netfilter_ipv4.h>
148
149 static void debug_print_hooks_ip(unsigned int nf_debug)
150 {
151         if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
152                 printk("PRE_ROUTING ");
153                 nf_debug ^= (1 << NF_IP_PRE_ROUTING);
154         }
155         if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
156                 printk("LOCAL_IN ");
157                 nf_debug ^= (1 << NF_IP_LOCAL_IN);
158         }
159         if (nf_debug & (1 << NF_IP_FORWARD)) {
160                 printk("FORWARD ");
161                 nf_debug ^= (1 << NF_IP_FORWARD);
162         }
163         if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
164                 printk("LOCAL_OUT ");
165                 nf_debug ^= (1 << NF_IP_LOCAL_OUT);
166         }
167         if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
168                 printk("POST_ROUTING ");
169                 nf_debug ^= (1 << NF_IP_POST_ROUTING);
170         }
171         if (nf_debug)
172                 printk("Crap bits: 0x%04X", nf_debug);
173         printk("\n");
174 }
175
176 void nf_dump_skb(int pf, struct sk_buff *skb)
177 {
178         printk("skb: pf=%i %s dev=%s len=%u\n", 
179                pf,
180                skb->sk ? "(owned)" : "(unowned)",
181                skb->dev ? skb->dev->name : "(no dev)",
182                skb->len);
183         switch (pf) {
184         case PF_INET: {
185                 const struct iphdr *ip = skb->nh.iph;
186                 __u32 *opt = (__u32 *) (ip + 1);
187                 int opti;
188                 __u16 src_port = 0, dst_port = 0;
189
190                 if (ip->protocol == IPPROTO_TCP
191                     || ip->protocol == IPPROTO_UDP) {
192                         struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
193                         src_port = ntohs(tcp->source);
194                         dst_port = ntohs(tcp->dest);
195                 }
196         
197                 printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
198                        " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
199                        ip->protocol, NIPQUAD(ip->saddr),
200                        src_port, NIPQUAD(ip->daddr),
201                        dst_port,
202                        ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
203                        ntohs(ip->frag_off), ip->ttl);
204
205                 for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
206                         printk(" O=0x%8.8X", *opt++);
207                 printk("\n");
208         }
209         }
210 }
211
212 void nf_debug_ip_local_deliver(struct sk_buff *skb)
213 {
214         /* If it's a loopback packet, it must have come through
215          * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
216          * NF_IP_LOCAL_IN.  Otherwise, must have gone through
217          * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING.  */
218         if (!skb->dev) {
219                 printk("ip_local_deliver: skb->dev is NULL.\n");
220         }
221         else if (strcmp(skb->dev->name, "lo") == 0) {
222                 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
223                                       | (1 << NF_IP_POST_ROUTING)
224                                       | (1 << NF_IP_PRE_ROUTING)
225                                       | (1 << NF_IP_LOCAL_IN))) {
226                         printk("ip_local_deliver: bad loopback skb: ");
227                         debug_print_hooks_ip(skb->nf_debug);
228                         nf_dump_skb(PF_INET, skb);
229                 }
230         }
231         else {
232                 if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
233                                       | (1<<NF_IP_LOCAL_IN))) {
234                         printk("ip_local_deliver: bad non-lo skb: ");
235                         debug_print_hooks_ip(skb->nf_debug);
236                         nf_dump_skb(PF_INET, skb);
237                 }
238         }
239 }
240
241 void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
242 {
243         if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
244                                  | (1 << NF_IP_POST_ROUTING))) {
245                 printk("ip_dev_loopback_xmit: bad owned skb = %p: ", 
246                        newskb);
247                 debug_print_hooks_ip(newskb->nf_debug);
248                 nf_dump_skb(PF_INET, newskb);
249         }
250         /* Clear to avoid confusing input check */
251         newskb->nf_debug = 0;
252 }
253
254 void nf_debug_ip_finish_output2(struct sk_buff *skb)
255 {
256         /* If it's owned, it must have gone through the
257          * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
258          * Otherwise, must have gone through
259          * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
260          */
261         if (skb->sk) {
262                 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
263                                       | (1 << NF_IP_POST_ROUTING))) {
264                         printk("ip_finish_output: bad owned skb = %p: ", skb);
265                         debug_print_hooks_ip(skb->nf_debug);
266                         nf_dump_skb(PF_INET, skb);
267                 }
268         } else {
269                 if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
270                                       | (1 << NF_IP_FORWARD)
271                                       | (1 << NF_IP_POST_ROUTING))) {
272                         /* Fragments, entunnelled packets, TCP RSTs
273                            generated by ipt_REJECT will have no
274                            owners, but still may be local */
275                         if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
276                                               | (1 << NF_IP_POST_ROUTING))){
277                                 printk("ip_finish_output:"
278                                        " bad unowned skb = %p: ",skb);
279                                 debug_print_hooks_ip(skb->nf_debug);
280                                 nf_dump_skb(PF_INET, skb);
281                         }
282                 }
283         }
284 }
285 #endif /*CONFIG_NETFILTER_DEBUG*/
286
287 /* Call get/setsockopt() */
288 static int nf_sockopt(struct sock *sk, int pf, int val, 
289                       char *opt, int *len, int get)
290 {
291         struct list_head *i;
292         struct nf_sockopt_ops *ops;
293         int ret;
294
295         if (down_interruptible(&nf_sockopt_mutex) != 0)
296                 return -EINTR;
297
298         list_for_each(i, &nf_sockopts) {
299                 ops = (struct nf_sockopt_ops *)i;
300                 if (ops->pf == pf) {
301                         if (get) {
302                                 if (val >= ops->get_optmin
303                                     && val < ops->get_optmax) {
304                                         ops->use++;
305                                         up(&nf_sockopt_mutex);
306                                         ret = ops->get(sk, val, opt, len);
307                                         goto out;
308                                 }
309                         } else {
310                                 if (val >= ops->set_optmin
311                                     && val < ops->set_optmax) {
312                                         ops->use++;
313                                         up(&nf_sockopt_mutex);
314                                         ret = ops->set(sk, val, opt, *len);
315                                         goto out;
316                                 }
317                         }
318                 }
319         }
320         up(&nf_sockopt_mutex);
321         return -ENOPROTOOPT;
322         
323  out:
324         down(&nf_sockopt_mutex);
325         ops->use--;
326         if (ops->cleanup_task)
327                 wake_up_process(ops->cleanup_task);
328         up(&nf_sockopt_mutex);
329         return ret;
330 }
331
332 int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
333                   int len)
334 {
335         return nf_sockopt(sk, pf, val, opt, &len, 0);
336 }
337
338 int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
339 {
340         return nf_sockopt(sk, pf, val, opt, len, 1);
341 }
342
343 static unsigned int nf_iterate(struct list_head *head,
344                                struct sk_buff **skb,
345                                int hook,
346                                const struct net_device *indev,
347                                const struct net_device *outdev,
348                                struct list_head **i,
349                                int (*okfn)(struct sk_buff *),
350                                int hook_thresh)
351 {
352         /*
353          * The caller must not block between calls to this
354          * function because of risk of continuing from deleted element.
355          */
356         list_for_each_continue_rcu(*i, head) {
357                 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
358
359                 if (hook_thresh > elem->priority)
360                         continue;
361
362                 /* Optimization: we don't need to hold module
363                    reference here, since function can't sleep. --RR */
364                 switch (elem->hook(hook, skb, indev, outdev, okfn)) {
365                 case NF_QUEUE:
366                         return NF_QUEUE;
367
368                 case NF_STOLEN:
369                         return NF_STOLEN;
370
371                 case NF_DROP:
372                         return NF_DROP;
373
374                 case NF_REPEAT:
375                         *i = (*i)->prev;
376                         break;
377
378 #ifdef CONFIG_NETFILTER_DEBUG
379                 case NF_ACCEPT:
380                         break;
381
382                 default:
383                         NFDEBUG("Evil return from %p(%u).\n", 
384                                 elem->hook, hook);
385 #endif
386                 }
387         }
388         return NF_ACCEPT;
389 }
390
391 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
392 {      
393         int ret;
394
395         write_lock_bh(&queue_handler_lock);
396         if (queue_handler[pf].outfn)
397                 ret = -EBUSY;
398         else {
399                 queue_handler[pf].outfn = outfn;
400                 queue_handler[pf].data = data;
401                 ret = 0;
402         }
403         write_unlock_bh(&queue_handler_lock);
404
405         return ret;
406 }
407
408 /* The caller must flush their queue before this */
409 int nf_unregister_queue_handler(int pf)
410 {
411         write_lock_bh(&queue_handler_lock);
412         queue_handler[pf].outfn = NULL;
413         queue_handler[pf].data = NULL;
414         write_unlock_bh(&queue_handler_lock);
415         
416         return 0;
417 }
418
419 /* 
420  * Any packet that leaves via this function must come back 
421  * through nf_reinject().
422  */
423 static int nf_queue(struct sk_buff *skb, 
424                     struct list_head *elem, 
425                     int pf, unsigned int hook,
426                     struct net_device *indev,
427                     struct net_device *outdev,
428                     int (*okfn)(struct sk_buff *))
429 {
430         int status;
431         struct nf_info *info;
432 #ifdef CONFIG_BRIDGE_NETFILTER
433         struct net_device *physindev = NULL;
434         struct net_device *physoutdev = NULL;
435 #endif
436
437         /* QUEUE == DROP if noone is waiting, to be safe. */
438         read_lock(&queue_handler_lock);
439         if (!queue_handler[pf].outfn) {
440                 read_unlock(&queue_handler_lock);
441                 kfree_skb(skb);
442                 return 1;
443         }
444
445         info = kmalloc(sizeof(*info), GFP_ATOMIC);
446         if (!info) {
447                 if (net_ratelimit())
448                         printk(KERN_ERR "OOM queueing packet %p\n",
449                                skb);
450                 read_unlock(&queue_handler_lock);
451                 kfree_skb(skb);
452                 return 1;
453         }
454
455         *info = (struct nf_info) { 
456                 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
457
458         /* If it's going away, ignore hook. */
459         if (!try_module_get(info->elem->owner)) {
460                 read_unlock(&queue_handler_lock);
461                 kfree(info);
462                 return 0;
463         }
464
465         /* Bump dev refs so they don't vanish while packet is out */
466         if (indev) dev_hold(indev);
467         if (outdev) dev_hold(outdev);
468
469 #ifdef CONFIG_BRIDGE_NETFILTER
470         if (skb->nf_bridge) {
471                 physindev = skb->nf_bridge->physindev;
472                 if (physindev) dev_hold(physindev);
473                 physoutdev = skb->nf_bridge->physoutdev;
474                 if (physoutdev) dev_hold(physoutdev);
475         }
476 #endif
477
478         status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
479         read_unlock(&queue_handler_lock);
480
481         if (status < 0) {
482                 /* James M doesn't say fuck enough. */
483                 if (indev) dev_put(indev);
484                 if (outdev) dev_put(outdev);
485 #ifdef CONFIG_BRIDGE_NETFILTER
486                 if (physindev) dev_put(physindev);
487                 if (physoutdev) dev_put(physoutdev);
488 #endif
489                 module_put(info->elem->owner);
490                 kfree(info);
491                 kfree_skb(skb);
492                 return 1;
493         }
494         return 1;
495 }
496
497 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
498                  struct net_device *indev,
499                  struct net_device *outdev,
500                  int (*okfn)(struct sk_buff *),
501                  int hook_thresh)
502 {
503         struct list_head *elem;
504         unsigned int verdict;
505         int ret = 0;
506
507         if (skb->ip_summed == CHECKSUM_HW) {
508                 if (outdev == NULL) {
509                         skb->ip_summed = CHECKSUM_NONE;
510                 } else {
511                         skb_checksum_help(skb);
512                 }
513         }
514
515         /* We may already have this, but read-locks nest anyway */
516         rcu_read_lock();
517
518 #ifdef CONFIG_NETFILTER_DEBUG
519         if (skb->nf_debug & (1 << hook)) {
520                 printk("nf_hook: hook %i already set.\n", hook);
521                 nf_dump_skb(pf, skb);
522         }
523         skb->nf_debug |= (1 << hook);
524 #endif
525
526         elem = &nf_hooks[pf][hook];
527  next_hook:
528         verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
529                              outdev, &elem, okfn, hook_thresh);
530         if (verdict == NF_QUEUE) {
531                 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
532                 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn))
533                         goto next_hook;
534         }
535
536         switch (verdict) {
537         case NF_ACCEPT:
538                 ret = okfn(skb);
539                 break;
540
541         case NF_DROP:
542                 kfree_skb(skb);
543                 ret = -EPERM;
544                 break;
545         }
546
547         rcu_read_unlock();
548         return ret;
549 }
550
551 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
552                  unsigned int verdict)
553 {
554         struct list_head *elem = &info->elem->list;
555         struct list_head *i;
556
557         rcu_read_lock();
558
559         /* Release those devices we held, or Alexey will kill me. */
560         if (info->indev) dev_put(info->indev);
561         if (info->outdev) dev_put(info->outdev);
562 #ifdef CONFIG_BRIDGE_NETFILTER
563         if (skb->nf_bridge) {
564                 if (skb->nf_bridge->physindev)
565                         dev_put(skb->nf_bridge->physindev);
566                 if (skb->nf_bridge->physoutdev)
567                         dev_put(skb->nf_bridge->physoutdev);
568         }
569 #endif
570
571         /* Drop reference to owner of hook which queued us. */
572         module_put(info->elem->owner);
573
574         list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
575                 if (i == elem) 
576                         break;
577         }
578   
579         if (elem == &nf_hooks[info->pf][info->hook]) {
580                 /* The module which sent it to userspace is gone. */
581                 NFDEBUG("%s: module disappeared, dropping packet.\n",
582                         __FUNCTION__);
583                 verdict = NF_DROP;
584         }
585
586         /* Continue traversal iff userspace said ok... */
587         if (verdict == NF_REPEAT) {
588                 elem = elem->prev;
589                 verdict = NF_ACCEPT;
590         }
591
592         if (verdict == NF_ACCEPT) {
593         next_hook:
594                 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
595                                      &skb, info->hook, 
596                                      info->indev, info->outdev, &elem,
597                                      info->okfn, INT_MIN);
598         }
599
600         switch (verdict) {
601         case NF_ACCEPT:
602                 info->okfn(skb);
603                 break;
604
605         case NF_QUEUE:
606                 if (!nf_queue(skb, elem, info->pf, info->hook, 
607                               info->indev, info->outdev, info->okfn))
608                         goto next_hook;
609                 break;
610         }
611         rcu_read_unlock();
612
613         if (verdict == NF_DROP)
614                 kfree_skb(skb);
615
616         kfree(info);
617         return;
618 }
619
620 #ifdef CONFIG_INET
621 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
622 int ip_route_me_harder(struct sk_buff **pskb)
623 {
624         struct iphdr *iph = (*pskb)->nh.iph;
625         struct rtable *rt;
626         struct flowi fl = {};
627         struct dst_entry *odst;
628         unsigned int hh_len;
629
630         /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
631          * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
632          */
633         if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
634                 fl.nl_u.ip4_u.daddr = iph->daddr;
635                 fl.nl_u.ip4_u.saddr = iph->saddr;
636                 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
637                 fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
638 #ifdef CONFIG_IP_ROUTE_FWMARK
639                 fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
640 #endif
641                 fl.proto = iph->protocol;
642                 if (ip_route_output_key(&rt, &fl) != 0)
643                         return -1;
644
645                 /* Drop old route. */
646                 dst_release((*pskb)->dst);
647                 (*pskb)->dst = &rt->u.dst;
648         } else {
649                 /* non-local src, find valid iif to satisfy
650                  * rp-filter when calling ip_route_input. */
651                 fl.nl_u.ip4_u.daddr = iph->saddr;
652                 if (ip_route_output_key(&rt, &fl) != 0)
653                         return -1;
654
655                 odst = (*pskb)->dst;
656                 if (ip_route_input(*pskb, iph->daddr, iph->saddr,
657                                    RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
658                         dst_release(&rt->u.dst);
659                         return -1;
660                 }
661                 dst_release(&rt->u.dst);
662                 dst_release(odst);
663         }
664         
665         if ((*pskb)->dst->error)
666                 return -1;
667
668         /* Change in oif may mean change in hh_len. */
669         hh_len = (*pskb)->dst->dev->hard_header_len;
670         if (skb_headroom(*pskb) < hh_len) {
671                 struct sk_buff *nskb;
672
673                 nskb = skb_realloc_headroom(*pskb, hh_len);
674                 if (!nskb) 
675                         return -1;
676                 if ((*pskb)->sk)
677                         skb_set_owner_w(nskb, (*pskb)->sk);
678                 kfree_skb(*pskb);
679                 *pskb = nskb;
680         }
681
682         return 0;
683 }
684
685 int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
686 {
687         struct sk_buff *nskb;
688         unsigned int iplen;
689
690         if (writable_len > (*pskb)->len)
691                 return 0;
692
693         /* Not exclusive use of packet?  Must copy. */
694         if (skb_shared(*pskb) || skb_cloned(*pskb))
695                 goto copy_skb;
696
697         /* Alexey says IP hdr is always modifiable and linear, so ok. */
698         if (writable_len <= (*pskb)->nh.iph->ihl*4)
699                 return 1;
700
701         iplen = writable_len - (*pskb)->nh.iph->ihl*4;
702
703         /* DaveM says protocol headers are also modifiable. */
704         switch ((*pskb)->nh.iph->protocol) {
705         case IPPROTO_TCP: {
706                 struct tcphdr hdr;
707                 if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
708                                   &hdr, sizeof(hdr)) != 0)
709                         goto copy_skb;
710                 if (writable_len <= (*pskb)->nh.iph->ihl*4 + hdr.doff*4)
711                         goto pull_skb;
712                 goto copy_skb;
713         }
714         case IPPROTO_UDP:
715                 if (writable_len<=(*pskb)->nh.iph->ihl*4+sizeof(struct udphdr))
716                         goto pull_skb;
717                 goto copy_skb;
718         case IPPROTO_ICMP:
719                 if (writable_len
720                     <= (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr))
721                         goto pull_skb;
722                 goto copy_skb;
723         /* Insert other cases here as desired */
724         }
725
726 copy_skb:
727         nskb = skb_copy(*pskb, GFP_ATOMIC);
728         if (!nskb)
729                 return 0;
730         BUG_ON(skb_is_nonlinear(nskb));
731
732         /* Rest of kernel will get very unhappy if we pass it a
733            suddenly-orphaned skbuff */
734         if ((*pskb)->sk)
735                 skb_set_owner_w(nskb, (*pskb)->sk);
736         kfree_skb(*pskb);
737         *pskb = nskb;
738         return 1;
739
740 pull_skb:
741         return pskb_may_pull(*pskb, writable_len);
742 }
743 EXPORT_SYMBOL(skb_ip_make_writable);
744 #endif /*CONFIG_INET*/
745
746 /* Internal logging interface, which relies on the real 
747    LOG target modules */
748
749 #define NF_LOG_PREFIXLEN                128
750
751 static nf_logfn *nf_logging[NPROTO]; /* = NULL */
752 static int reported = 0;
753 static spinlock_t nf_log_lock = SPIN_LOCK_UNLOCKED;
754
755 int nf_log_register(int pf, nf_logfn *logfn)
756 {
757         int ret = -EBUSY;
758
759         /* Any setup of logging members must be done before
760          * substituting pointer. */
761         smp_wmb();
762         spin_lock(&nf_log_lock);
763         if (!nf_logging[pf]) {
764                 nf_logging[pf] = logfn;
765                 ret = 0;
766         }
767         spin_unlock(&nf_log_lock);
768         return ret;
769 }               
770
771 void nf_log_unregister(int pf, nf_logfn *logfn)
772 {
773         spin_lock(&nf_log_lock);
774         if (nf_logging[pf] == logfn)
775                 nf_logging[pf] = NULL;
776         spin_unlock(&nf_log_lock);
777
778         /* Give time to concurrent readers. */
779         synchronize_net();
780 }               
781
782 void nf_log_packet(int pf,
783                    unsigned int hooknum,
784                    const struct sk_buff *skb,
785                    const struct net_device *in,
786                    const struct net_device *out,
787                    const char *fmt, ...)
788 {
789         va_list args;
790         char prefix[NF_LOG_PREFIXLEN];
791         nf_logfn *logfn;
792         
793         rcu_read_lock();
794         logfn = nf_logging[pf];
795         if (logfn) {
796                 va_start(args, fmt);
797                 vsnprintf(prefix, sizeof(prefix), fmt, args);
798                 va_end(args);
799                 /* We must read logging before nf_logfn[pf] */
800                 smp_read_barrier_depends();
801                 logfn(hooknum, skb, in, out, prefix);
802         } else if (!reported) {
803                 printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
804                        "no backend logging module loaded in!\n");
805                 reported++;
806         }
807         rcu_read_unlock();
808 }
809 EXPORT_SYMBOL(nf_log_register);
810 EXPORT_SYMBOL(nf_log_unregister);
811 EXPORT_SYMBOL(nf_log_packet);
812
813 /* This does not belong here, but ipt_REJECT needs it if connection
814    tracking in use: without this, connection may not be in hash table,
815    and hence manufactured ICMP or RST packets will not be associated
816    with it. */
817 void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
818
819 void __init netfilter_init(void)
820 {
821         int i, h;
822
823         for (i = 0; i < NPROTO; i++) {
824                 for (h = 0; h < NF_MAX_HOOKS; h++)
825                         INIT_LIST_HEAD(&nf_hooks[i][h]);
826         }
827 }
828
829 EXPORT_SYMBOL(ip_ct_attach);
830 EXPORT_SYMBOL(ip_route_me_harder);
831 EXPORT_SYMBOL(nf_getsockopt);
832 EXPORT_SYMBOL(nf_hook_slow);
833 EXPORT_SYMBOL(nf_hooks);
834 EXPORT_SYMBOL(nf_register_hook);
835 EXPORT_SYMBOL(nf_register_queue_handler);
836 EXPORT_SYMBOL(nf_register_sockopt);
837 EXPORT_SYMBOL(nf_reinject);
838 EXPORT_SYMBOL(nf_setsockopt);
839 EXPORT_SYMBOL(nf_unregister_hook);
840 EXPORT_SYMBOL(nf_unregister_queue_handler);
841 EXPORT_SYMBOL(nf_unregister_sockopt);