1 /* netfilter.c: look after the filters for various protocols.
2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
7 * Rusty Russell (C)2000 -- This code is GPL.
9 * February 2000: Modified by James Morris to have 1 queue per protocol.
10 * 15-Mar-2000: Added NF_REPEAT --RR.
11 * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
13 #include <linux/config.h>
14 #include <linux/kernel.h>
15 #include <linux/netfilter.h>
16 #include <net/protocol.h>
17 #include <linux/init.h>
18 #include <linux/skbuff.h>
19 #include <linux/wait.h>
20 #include <linux/module.h>
21 #include <linux/interrupt.h>
23 #include <linux/netdevice.h>
24 #include <linux/inetdevice.h>
25 #include <linux/tcp.h>
26 #include <linux/udp.h>
27 #include <linux/icmp.h>
29 #include <net/route.h>
32 /* In this code, we can be waiting indefinitely for userspace to
33 * service a packet if a hook returns NF_QUEUE. We could keep a count
34 * of skbuffs queued for userspace, and not deregister a hook unless
35 * this is zero, but that sucks. Now, we simply check when the
36 * packets come back: if the hook is gone, the packet is discarded. */
37 #ifdef CONFIG_NETFILTER_DEBUG
38 #define NFDEBUG(format, args...) printk(format , ## args)
40 #define NFDEBUG(format, args...)
43 /* Sockopts only registered and called from user context, so
44 net locking would be overkill. Also, [gs]etsockopt calls may
46 static DECLARE_MUTEX(nf_sockopt_mutex);
48 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
49 static LIST_HEAD(nf_sockopts);
50 static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED;
53 * A queue handler may be registered for each protocol. Each is protected by
54 * long term mutex. The handler must provide an an outfn() to accept packets
55 * for queueing and must reinject all packets it receives, no matter what.
57 static struct nf_queue_handler_t {
58 nf_queue_outfn_t outfn;
60 } queue_handler[NPROTO];
61 static rwlock_t queue_handler_lock = RW_LOCK_UNLOCKED;
63 int nf_register_hook(struct nf_hook_ops *reg)
67 spin_lock_bh(&nf_hook_lock);
68 list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
69 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
72 list_add_rcu(®->list, i->prev);
73 spin_unlock_bh(&nf_hook_lock);
79 void nf_unregister_hook(struct nf_hook_ops *reg)
81 spin_lock_bh(&nf_hook_lock);
82 list_del_rcu(®->list);
83 spin_unlock_bh(&nf_hook_lock);
88 /* Do exclusive ranges overlap? */
89 static inline int overlap(int min1, int max1, int min2, int max2)
91 return max1 > min2 && min1 < max2;
94 /* Functions to register sockopt ranges (exclusive). */
95 int nf_register_sockopt(struct nf_sockopt_ops *reg)
100 if (down_interruptible(&nf_sockopt_mutex) != 0)
103 list_for_each(i, &nf_sockopts) {
104 struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
105 if (ops->pf == reg->pf
106 && (overlap(ops->set_optmin, ops->set_optmax,
107 reg->set_optmin, reg->set_optmax)
108 || overlap(ops->get_optmin, ops->get_optmax,
109 reg->get_optmin, reg->get_optmax))) {
110 NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
111 ops->set_optmin, ops->set_optmax,
112 ops->get_optmin, ops->get_optmax,
113 reg->set_optmin, reg->set_optmax,
114 reg->get_optmin, reg->get_optmax);
120 list_add(®->list, &nf_sockopts);
122 up(&nf_sockopt_mutex);
126 void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
128 /* No point being interruptible: we're probably in cleanup_module() */
130 down(&nf_sockopt_mutex);
132 /* To be woken by nf_sockopt call... */
133 /* FIXME: Stuart Young's name appears gratuitously. */
134 set_current_state(TASK_UNINTERRUPTIBLE);
135 reg->cleanup_task = current;
136 up(&nf_sockopt_mutex);
140 list_del(®->list);
141 up(&nf_sockopt_mutex);
144 #ifdef CONFIG_NETFILTER_DEBUG
147 #include <linux/netfilter_ipv4.h>
149 static void debug_print_hooks_ip(unsigned int nf_debug)
151 if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
152 printk("PRE_ROUTING ");
153 nf_debug ^= (1 << NF_IP_PRE_ROUTING);
155 if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
157 nf_debug ^= (1 << NF_IP_LOCAL_IN);
159 if (nf_debug & (1 << NF_IP_FORWARD)) {
161 nf_debug ^= (1 << NF_IP_FORWARD);
163 if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
164 printk("LOCAL_OUT ");
165 nf_debug ^= (1 << NF_IP_LOCAL_OUT);
167 if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
168 printk("POST_ROUTING ");
169 nf_debug ^= (1 << NF_IP_POST_ROUTING);
172 printk("Crap bits: 0x%04X", nf_debug);
176 void nf_dump_skb(int pf, struct sk_buff *skb)
178 printk("skb: pf=%i %s dev=%s len=%u\n",
180 skb->sk ? "(owned)" : "(unowned)",
181 skb->dev ? skb->dev->name : "(no dev)",
185 const struct iphdr *ip = skb->nh.iph;
186 __u32 *opt = (__u32 *) (ip + 1);
188 __u16 src_port = 0, dst_port = 0;
190 if (ip->protocol == IPPROTO_TCP
191 || ip->protocol == IPPROTO_UDP) {
192 struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
193 src_port = ntohs(tcp->source);
194 dst_port = ntohs(tcp->dest);
197 printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
198 " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
199 ip->protocol, NIPQUAD(ip->saddr),
200 src_port, NIPQUAD(ip->daddr),
202 ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
203 ntohs(ip->frag_off), ip->ttl);
205 for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
206 printk(" O=0x%8.8X", *opt++);
207 printk(" MARK=%lu (0x%lu)",
208 (long unsigned int)skb->nfmark,
209 (long unsigned int)skb->nfmark);
215 void nf_debug_ip_local_deliver(struct sk_buff *skb)
217 /* If it's a loopback packet, it must have come through
218 * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
219 * NF_IP_LOCAL_IN. Otherwise, must have gone through
220 * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
222 printk("ip_local_deliver: skb->dev is NULL.\n");
224 else if (strcmp(skb->dev->name, "lo") == 0) {
225 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
226 | (1 << NF_IP_POST_ROUTING)
227 | (1 << NF_IP_PRE_ROUTING)
228 | (1 << NF_IP_LOCAL_IN))) {
229 printk("ip_local_deliver: bad loopback skb: ");
230 debug_print_hooks_ip(skb->nf_debug);
231 nf_dump_skb(PF_INET, skb);
235 if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
236 | (1<<NF_IP_LOCAL_IN))) {
237 printk("ip_local_deliver: bad non-lo skb: ");
238 debug_print_hooks_ip(skb->nf_debug);
239 nf_dump_skb(PF_INET, skb);
244 void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
246 if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
247 | (1 << NF_IP_POST_ROUTING))) {
248 printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
250 debug_print_hooks_ip(newskb->nf_debug);
251 nf_dump_skb(PF_INET, newskb);
253 /* Clear to avoid confusing input check */
254 newskb->nf_debug = 0;
257 void nf_debug_ip_finish_output2(struct sk_buff *skb)
259 /* If it's owned, it must have gone through the
260 * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
261 * Otherwise, must have gone through
262 * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
265 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
266 | (1 << NF_IP_POST_ROUTING))) {
267 printk("ip_finish_output: bad owned skb = %p: ", skb);
268 debug_print_hooks_ip(skb->nf_debug);
269 nf_dump_skb(PF_INET, skb);
272 if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
273 | (1 << NF_IP_FORWARD)
274 | (1 << NF_IP_POST_ROUTING))) {
275 /* Fragments, entunnelled packets, TCP RSTs
276 generated by ipt_REJECT will have no
277 owners, but still may be local */
278 if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
279 | (1 << NF_IP_POST_ROUTING))){
280 printk("ip_finish_output:"
281 " bad unowned skb = %p: ",skb);
282 debug_print_hooks_ip(skb->nf_debug);
283 nf_dump_skb(PF_INET, skb);
288 #endif /*CONFIG_NETFILTER_DEBUG*/
290 /* Call get/setsockopt() */
291 static int nf_sockopt(struct sock *sk, int pf, int val,
292 char __user *opt, int *len, int get)
295 struct nf_sockopt_ops *ops;
298 if (down_interruptible(&nf_sockopt_mutex) != 0)
301 list_for_each(i, &nf_sockopts) {
302 ops = (struct nf_sockopt_ops *)i;
305 if (val >= ops->get_optmin
306 && val < ops->get_optmax) {
308 up(&nf_sockopt_mutex);
309 ret = ops->get(sk, val, opt, len);
313 if (val >= ops->set_optmin
314 && val < ops->set_optmax) {
316 up(&nf_sockopt_mutex);
317 ret = ops->set(sk, val, opt, *len);
323 up(&nf_sockopt_mutex);
327 down(&nf_sockopt_mutex);
329 if (ops->cleanup_task)
330 wake_up_process(ops->cleanup_task);
331 up(&nf_sockopt_mutex);
335 int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt,
338 return nf_sockopt(sk, pf, val, opt, &len, 0);
341 int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len)
343 return nf_sockopt(sk, pf, val, opt, len, 1);
346 static unsigned int nf_iterate(struct list_head *head,
347 struct sk_buff **skb,
349 const struct net_device *indev,
350 const struct net_device *outdev,
351 struct list_head **i,
352 int (*okfn)(struct sk_buff *),
356 * The caller must not block between calls to this
357 * function because of risk of continuing from deleted element.
359 list_for_each_continue_rcu(*i, head) {
360 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
362 if (hook_thresh > elem->priority)
365 /* Optimization: we don't need to hold module
366 reference here, since function can't sleep. --RR */
367 switch (elem->hook(hook, skb, indev, outdev, okfn)) {
381 #ifdef CONFIG_NETFILTER_DEBUG
386 NFDEBUG("Evil return from %p(%u).\n",
394 int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
398 write_lock_bh(&queue_handler_lock);
399 if (queue_handler[pf].outfn)
402 queue_handler[pf].outfn = outfn;
403 queue_handler[pf].data = data;
406 write_unlock_bh(&queue_handler_lock);
411 /* The caller must flush their queue before this */
412 int nf_unregister_queue_handler(int pf)
414 write_lock_bh(&queue_handler_lock);
415 queue_handler[pf].outfn = NULL;
416 queue_handler[pf].data = NULL;
417 write_unlock_bh(&queue_handler_lock);
423 * Any packet that leaves via this function must come back
424 * through nf_reinject().
426 static int nf_queue(struct sk_buff *skb,
427 struct list_head *elem,
428 int pf, unsigned int hook,
429 struct net_device *indev,
430 struct net_device *outdev,
431 int (*okfn)(struct sk_buff *))
434 struct nf_info *info;
435 #ifdef CONFIG_BRIDGE_NETFILTER
436 struct net_device *physindev = NULL;
437 struct net_device *physoutdev = NULL;
440 /* QUEUE == DROP if noone is waiting, to be safe. */
441 read_lock(&queue_handler_lock);
442 if (!queue_handler[pf].outfn) {
443 read_unlock(&queue_handler_lock);
448 info = kmalloc(sizeof(*info), GFP_ATOMIC);
451 printk(KERN_ERR "OOM queueing packet %p\n",
453 read_unlock(&queue_handler_lock);
458 *info = (struct nf_info) {
459 (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
461 /* If it's going away, ignore hook. */
462 if (!try_module_get(info->elem->owner)) {
463 read_unlock(&queue_handler_lock);
468 /* Bump dev refs so they don't vanish while packet is out */
469 if (indev) dev_hold(indev);
470 if (outdev) dev_hold(outdev);
472 #ifdef CONFIG_BRIDGE_NETFILTER
473 if (skb->nf_bridge) {
474 physindev = skb->nf_bridge->physindev;
475 if (physindev) dev_hold(physindev);
476 physoutdev = skb->nf_bridge->physoutdev;
477 if (physoutdev) dev_hold(physoutdev);
481 status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
482 read_unlock(&queue_handler_lock);
485 /* James M doesn't say fuck enough. */
486 if (indev) dev_put(indev);
487 if (outdev) dev_put(outdev);
488 #ifdef CONFIG_BRIDGE_NETFILTER
489 if (physindev) dev_put(physindev);
490 if (physoutdev) dev_put(physoutdev);
492 module_put(info->elem->owner);
500 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
501 struct net_device *indev,
502 struct net_device *outdev,
503 int (*okfn)(struct sk_buff *),
506 struct list_head *elem;
507 unsigned int verdict;
510 /* We may already have this, but read-locks nest anyway */
513 #ifdef CONFIG_NETFILTER_DEBUG
514 if (skb->nf_debug & (1 << hook)) {
515 printk("nf_hook: hook %i already set.\n", hook);
516 nf_dump_skb(pf, skb);
518 skb->nf_debug |= (1 << hook);
521 elem = &nf_hooks[pf][hook];
523 verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
524 outdev, &elem, okfn, hook_thresh);
525 if (verdict == NF_QUEUE) {
526 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
527 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn))
546 void nf_reinject(struct sk_buff *skb, struct nf_info *info,
547 unsigned int verdict)
549 struct list_head *elem = &info->elem->list;
554 /* Release those devices we held, or Alexey will kill me. */
555 if (info->indev) dev_put(info->indev);
556 if (info->outdev) dev_put(info->outdev);
557 #ifdef CONFIG_BRIDGE_NETFILTER
558 if (skb->nf_bridge) {
559 if (skb->nf_bridge->physindev)
560 dev_put(skb->nf_bridge->physindev);
561 if (skb->nf_bridge->physoutdev)
562 dev_put(skb->nf_bridge->physoutdev);
566 /* Drop reference to owner of hook which queued us. */
567 module_put(info->elem->owner);
569 list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
574 if (elem == &nf_hooks[info->pf][info->hook]) {
575 /* The module which sent it to userspace is gone. */
576 NFDEBUG("%s: module disappeared, dropping packet.\n",
581 /* Continue traversal iff userspace said ok... */
582 if (verdict == NF_REPEAT) {
587 if (verdict == NF_ACCEPT) {
589 verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
591 info->indev, info->outdev, &elem,
592 info->okfn, INT_MIN);
601 if (!nf_queue(skb, elem, info->pf, info->hook,
602 info->indev, info->outdev, info->okfn))
608 if (verdict == NF_DROP)
616 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
617 int ip_route_me_harder(struct sk_buff **pskb)
619 struct iphdr *iph = (*pskb)->nh.iph;
621 struct flowi fl = {};
622 struct dst_entry *odst;
625 /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
626 * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
628 if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
629 fl.nl_u.ip4_u.daddr = iph->daddr;
630 fl.nl_u.ip4_u.saddr = iph->saddr;
631 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
632 fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
633 #ifdef CONFIG_IP_ROUTE_FWMARK
634 fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
636 fl.proto = iph->protocol;
637 if (ip_route_output_key(&rt, &fl) != 0)
640 /* Drop old route. */
641 dst_release((*pskb)->dst);
642 (*pskb)->dst = &rt->u.dst;
644 /* non-local src, find valid iif to satisfy
645 * rp-filter when calling ip_route_input. */
646 fl.nl_u.ip4_u.daddr = iph->saddr;
647 if (ip_route_output_key(&rt, &fl) != 0)
651 if (ip_route_input(*pskb, iph->daddr, iph->saddr,
652 RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
653 dst_release(&rt->u.dst);
656 dst_release(&rt->u.dst);
660 if ((*pskb)->dst->error)
663 /* Change in oif may mean change in hh_len. */
664 hh_len = (*pskb)->dst->dev->hard_header_len;
665 if (skb_headroom(*pskb) < hh_len) {
666 struct sk_buff *nskb;
668 nskb = skb_realloc_headroom(*pskb, hh_len);
672 skb_set_owner_w(nskb, (*pskb)->sk);
680 int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
682 struct sk_buff *nskb;
685 if (writable_len > (*pskb)->len)
688 /* Not exclusive use of packet? Must copy. */
689 if (skb_shared(*pskb) || skb_cloned(*pskb))
692 /* Alexey says IP hdr is always modifiable and linear, so ok. */
693 if (writable_len <= (*pskb)->nh.iph->ihl*4)
696 iplen = writable_len - (*pskb)->nh.iph->ihl*4;
698 /* DaveM says protocol headers are also modifiable. */
699 switch ((*pskb)->nh.iph->protocol) {
702 if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
703 &hdr, sizeof(hdr)) != 0)
705 if (writable_len <= (*pskb)->nh.iph->ihl*4 + hdr.doff*4)
710 if (writable_len<=(*pskb)->nh.iph->ihl*4+sizeof(struct udphdr))
715 <= (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr))
718 /* Insert other cases here as desired */
722 nskb = skb_copy(*pskb, GFP_ATOMIC);
725 BUG_ON(skb_is_nonlinear(nskb));
727 /* Rest of kernel will get very unhappy if we pass it a
728 suddenly-orphaned skbuff */
730 skb_set_owner_w(nskb, (*pskb)->sk);
736 return pskb_may_pull(*pskb, writable_len);
738 EXPORT_SYMBOL(skb_ip_make_writable);
739 #endif /*CONFIG_INET*/
741 /* Internal logging interface, which relies on the real
742 LOG target modules */
744 #define NF_LOG_PREFIXLEN 128
746 static nf_logfn *nf_logging[NPROTO]; /* = NULL */
747 static int reported = 0;
748 static spinlock_t nf_log_lock = SPIN_LOCK_UNLOCKED;
750 int nf_log_register(int pf, nf_logfn *logfn)
754 /* Any setup of logging members must be done before
755 * substituting pointer. */
757 spin_lock(&nf_log_lock);
758 if (!nf_logging[pf]) {
759 nf_logging[pf] = logfn;
762 spin_unlock(&nf_log_lock);
766 void nf_log_unregister(int pf, nf_logfn *logfn)
768 spin_lock(&nf_log_lock);
769 if (nf_logging[pf] == logfn)
770 nf_logging[pf] = NULL;
771 spin_unlock(&nf_log_lock);
773 /* Give time to concurrent readers. */
777 void nf_log_packet(int pf,
778 unsigned int hooknum,
779 const struct sk_buff *skb,
780 const struct net_device *in,
781 const struct net_device *out,
782 const char *fmt, ...)
785 char prefix[NF_LOG_PREFIXLEN];
789 logfn = nf_logging[pf];
792 vsnprintf(prefix, sizeof(prefix), fmt, args);
794 /* We must read logging before nf_logfn[pf] */
795 smp_read_barrier_depends();
796 logfn(hooknum, skb, in, out, prefix);
797 } else if (!reported) {
798 printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
799 "no backend logging module loaded in!\n");
804 EXPORT_SYMBOL(nf_log_register);
805 EXPORT_SYMBOL(nf_log_unregister);
806 EXPORT_SYMBOL(nf_log_packet);
808 /* This does not belong here, but ipt_REJECT needs it if connection
809 tracking in use: without this, connection may not be in hash table,
810 and hence manufactured ICMP or RST packets will not be associated
812 void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
814 void __init netfilter_init(void)
818 for (i = 0; i < NPROTO; i++) {
819 for (h = 0; h < NF_MAX_HOOKS; h++)
820 INIT_LIST_HEAD(&nf_hooks[i][h]);
824 EXPORT_SYMBOL(ip_ct_attach);
825 EXPORT_SYMBOL(ip_route_me_harder);
826 EXPORT_SYMBOL(nf_getsockopt);
827 EXPORT_SYMBOL(nf_hook_slow);
828 EXPORT_SYMBOL(nf_hooks);
829 EXPORT_SYMBOL(nf_register_hook);
830 EXPORT_SYMBOL(nf_register_queue_handler);
831 EXPORT_SYMBOL(nf_register_sockopt);
832 EXPORT_SYMBOL(nf_reinject);
833 EXPORT_SYMBOL(nf_setsockopt);
834 EXPORT_SYMBOL(nf_unregister_hook);
835 EXPORT_SYMBOL(nf_unregister_queue_handler);
836 EXPORT_SYMBOL(nf_unregister_sockopt);
837 #ifdef CONFIG_NETFILTER_DEBUG
838 EXPORT_SYMBOL(nf_dump_skb);